[Structured Output][Reasoning] Improves decoding throughput for models using single-token reasoning endings. (#30056)

This commit is contained in:
Hubert de La Jonquiere
2025-12-09 11:54:08 +01:00
committed by GitHub
parent 67475a6e81
commit c72ea10723
10 changed files with 89 additions and 1 deletions

View File

@@ -70,6 +70,7 @@ class TestReasoningStructuredOutput:
request.use_structured_output = True
request.prompt_token_ids = [1, 2, 3, 4, 5]
request.all_token_ids = [1, 2, 3, 4, 5, 6, 7, 8]
request.num_computed_tokens = 5
return request
def test_should_fill_bitmask_with_enable_in_reasoning(