[BugFix] Fix async scheduling + reasoning with struct output (#31332)

Signed-off-by: Nick Hill <nickhill123@gmail.com>
2025-12-25 15:01:02 -08:00
parent f1531d9f2a
commit 81786c8774
3 changed files with 9 additions and 3 deletions
--- a/tests/v1/entrypoints/llm/test_struct_output_generate.py
+++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py
@@ -608,7 +608,7 @@ Make the response as short as possible.


@pytest.mark.parametrize(
-    "model_name, backend, tokenizer_mode, reasoning_parser, speculative_config",  # noqa: E501
+    "model_name, backend, tokenizer_mode, reasoning_parser, speculative_config, async_scheduling",  # noqa: E501
    [
        (
            "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
@@ -616,8 +616,10 @@ Make the response as short as possible.
            "auto",
            "deepseek_r1",
            NGRAM_SPEC_CONFIG,
+            False,
        ),
-        ("Qwen/Qwen3-1.7B", "xgrammar", "auto", "deepseek_r1", None),
+        ("Qwen/Qwen3-1.7B", "xgrammar", "auto", "deepseek_r1", None, False),
+        ("Qwen/Qwen3-1.7B", "xgrammar", "auto", "deepseek_r1", None, True),
    ],
 )
 def test_structured_output_with_reasoning_matrices(
@@ -626,6 +628,7 @@ def test_structured_output_with_reasoning_matrices(
    reasoning_parser: str,
    model_name: str,
    speculative_config: dict[str, Any] | None,
+    async_scheduling: bool,
 ):
    if current_platform.is_tpu() and speculative_config:
        pytest.skip("TPU does not support speculative decoding")
@@ -646,6 +649,7 @@ def test_structured_output_with_reasoning_matrices(
        ),
        tokenizer_mode=tokenizer_mode,
        speculative_config=speculative_config,
+        async_scheduling=async_scheduling,
    )
    tokenizer = llm.get_tokenizer()
    reasoner = ReasoningParserManager.get_reasoning_parser(reasoning_parser)(