[Structured Output][Reasoning] Improves decoding throughput for models using single-token reasoning endings. (#30056)
This commit is contained in:
committed by
GitHub
parent
67475a6e81
commit
c72ea10723
@@ -70,6 +70,7 @@ class TestReasoningStructuredOutput:
|
||||
request.use_structured_output = True
|
||||
request.prompt_token_ids = [1, 2, 3, 4, 5]
|
||||
request.all_token_ids = [1, 2, 3, 4, 5, 6, 7, 8]
|
||||
request.num_computed_tokens = 5
|
||||
return request
|
||||
|
||||
def test_should_fill_bitmask_with_enable_in_reasoning(
|
||||
|
||||
Reference in New Issue
Block a user