[Core] Support logprobs with spec decode + async scheduling (#29223)

Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Nick Hill
2025-11-25 12:55:24 -08:00
committed by GitHub
parent e7d776273d
commit 4e57c6587f
4 changed files with 35 additions and 25 deletions

View File

@@ -1089,8 +1089,6 @@ class Scheduler(SchedulerInterface):
and request.sampling_params.logprobs is not None
and logprobs
):
# NOTE: once we support N tokens per step (spec decode),
# the outer lists can be of length > 1.
new_logprobs = logprobs.slice(req_index, req_index + 1)
if new_token_ids and self.structured_output_manager.should_advance(request):