[AsyncScheduling] Make async overlap work with logprobs (#27615)

Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Nick Hill
2025-10-28 15:35:54 -07:00
committed by GitHub
parent 111faf1118
commit 4fe5895361
4 changed files with 65 additions and 10 deletions

View File

@@ -831,8 +831,9 @@ class VllmRunner:
images: PromptImageInput | None = None,
videos: PromptVideoInput | None = None,
audios: PromptAudioInput | None = None,
return_logprobs: bool = False,
**kwargs: Any,
) -> list[tuple[list[list[int]], list[str]]]:
) -> list[tuple[list[list[int]], list[str]]] | tuple[list, list]:
inputs = self.get_inputs(prompts, images=images, videos=videos, audios=audios)
req_outputs = self.llm.generate(
@@ -840,18 +841,23 @@ class VllmRunner:
)
outputs: list[tuple[list[list[int]], list[str]]] = []
logprobs = []
for req_output in req_outputs:
prompt_str = req_output.prompt
prompt_ids = req_output.prompt_token_ids
req_sample_output_ids: list[list[int]] = []
req_sample_output_strs: list[str] = []
req_logprobs = []
for sample in req_output.outputs:
output_str = sample.text
output_ids = list(sample.token_ids)
req_sample_output_ids.append(prompt_ids + output_ids)
req_sample_output_strs.append((prompt_str or "") + output_str)
if sample.logprobs:
req_logprobs.extend(sample.logprobs)
outputs.append((req_sample_output_ids, req_sample_output_strs))
return outputs
logprobs.append(req_logprobs)
return outputs if not return_logprobs else (outputs, logprobs)
@staticmethod
def _final_steps_generate_w_logprobs(