[Bugfix] Spec decode + structured output + spec model max len edge case (#28298)
Signed-off-by: Andy Lo <andy@mistral.ai>
This commit is contained in:
@@ -325,6 +325,9 @@ class Scheduler(SchedulerInterface):
|
||||
scheduled_spec_decode_tokens[request.request_id] = (
|
||||
request.spec_token_ids
|
||||
)
|
||||
# New spec tokens will be set in `update_draft_token_ids` before the
|
||||
# next step when applicable.
|
||||
request.spec_token_ids = []
|
||||
|
||||
# Encoder-related.
|
||||
if encoder_inputs_to_schedule:
|
||||
@@ -1149,10 +1152,7 @@ class Scheduler(SchedulerInterface):
|
||||
continue
|
||||
|
||||
# Add newly generated spec token ids to the request.
|
||||
if not spec_token_ids:
|
||||
# NOTE(woosuk): request.spec_token_ids should be updated.
|
||||
request.spec_token_ids.clear()
|
||||
elif self.structured_output_manager.should_advance(request):
|
||||
if self.structured_output_manager.should_advance(request):
|
||||
metadata = request.structured_output_request
|
||||
request.spec_token_ids = metadata.grammar.validate_tokens( # type: ignore[union-attr]
|
||||
spec_token_ids
|
||||
|
||||
Reference in New Issue
Block a user