[Bugfix] Spec decode + structured output + spec model max len edge case (#28298)

Signed-off-by: Andy Lo <andy@mistral.ai>
This commit is contained in:
Andy Lo
2025-11-08 19:44:25 +00:00
committed by GitHub
parent 26990d25dc
commit 47604137a2
3 changed files with 36 additions and 8 deletions

View File

@@ -325,6 +325,9 @@ class Scheduler(SchedulerInterface):
scheduled_spec_decode_tokens[request.request_id] = (
request.spec_token_ids
)
# New spec tokens will be set in `update_draft_token_ids` before the
# next step when applicable.
request.spec_token_ids = []
# Encoder-related.
if encoder_inputs_to_schedule:
@@ -1149,10 +1152,7 @@ class Scheduler(SchedulerInterface):
continue
# Add newly generated spec token ids to the request.
if not spec_token_ids:
# NOTE(woosuk): request.spec_token_ids should be updated.
request.spec_token_ids.clear()
elif self.structured_output_manager.should_advance(request):
if self.structured_output_manager.should_advance(request):
metadata = request.structured_output_request
request.spec_token_ids = metadata.grammar.validate_tokens( # type: ignore[union-attr]
spec_token_ids