[BugFix] Fix async scheduling + reasoning with struct output (#31332)
Signed-off-by: Nick Hill <nickhill123@gmail.com>
This commit is contained in:
@@ -608,7 +608,7 @@ Make the response as short as possible.
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model_name, backend, tokenizer_mode, reasoning_parser, speculative_config", # noqa: E501
|
||||
"model_name, backend, tokenizer_mode, reasoning_parser, speculative_config, async_scheduling", # noqa: E501
|
||||
[
|
||||
(
|
||||
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
||||
@@ -616,8 +616,10 @@ Make the response as short as possible.
|
||||
"auto",
|
||||
"deepseek_r1",
|
||||
NGRAM_SPEC_CONFIG,
|
||||
False,
|
||||
),
|
||||
("Qwen/Qwen3-1.7B", "xgrammar", "auto", "deepseek_r1", None),
|
||||
("Qwen/Qwen3-1.7B", "xgrammar", "auto", "deepseek_r1", None, False),
|
||||
("Qwen/Qwen3-1.7B", "xgrammar", "auto", "deepseek_r1", None, True),
|
||||
],
|
||||
)
|
||||
def test_structured_output_with_reasoning_matrices(
|
||||
@@ -626,6 +628,7 @@ def test_structured_output_with_reasoning_matrices(
|
||||
reasoning_parser: str,
|
||||
model_name: str,
|
||||
speculative_config: dict[str, Any] | None,
|
||||
async_scheduling: bool,
|
||||
):
|
||||
if current_platform.is_tpu() and speculative_config:
|
||||
pytest.skip("TPU does not support speculative decoding")
|
||||
@@ -646,6 +649,7 @@ def test_structured_output_with_reasoning_matrices(
|
||||
),
|
||||
tokenizer_mode=tokenizer_mode,
|
||||
speculative_config=speculative_config,
|
||||
async_scheduling=async_scheduling,
|
||||
)
|
||||
tokenizer = llm.get_tokenizer()
|
||||
reasoner = ReasoningParserManager.get_reasoning_parser(reasoning_parser)(
|
||||
|
||||
Reference in New Issue
Block a user