tests/reasoning/test_qwen3_reasoning_parser.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import pytest
from transformers import AutoTokenizer

from tests.reasoning.utils import (
    StreamingReasoningReconstructor,
    run_reasoning_extraction,
    run_reasoning_extraction_streaming,
)
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.reasoning import ReasoningParser, ReasoningParserManager

parser_name = "qwen3"
start_token = "<think>"
end_token = "</think>"

REASONING_MODEL_NAMES = [
    "Qwen/Qwen3-0.6B",
    "Qwen/Qwen3.5-397B-A17B",
    "Qwen/Qwen3-4B-Thinking-2507",
]


@pytest.fixture(scope="module", params=REASONING_MODEL_NAMES)
def qwen3_tokenizer(request):
    return AutoTokenizer.from_pretrained(request.param)


# --- <think> in prompt, only </think> in output (typical) ---

WITHOUT_START_TOKEN = {
    "output": "This is a reasoning section</think>This is the rest",
    "reasoning": "This is a reasoning section",
    "content": "This is the rest",
}
WITHOUT_START_TOKEN_STREAM = {
    "output": "This is a reasoning section</think>This is the rest",
    "reasoning": "This is a reasoning section",
    "content": "This is the rest",
}
WITHOUT_START_TOKEN_COMPLETE_REASONING = {
    "output": "This is a reasoning section</think>",
    "reasoning": "This is a reasoning section",
    "content": None,
}

# --- <think> present in output (old template / edge case) ---

WITH_THINK = {
    "output": "<think>This is a reasoning section</think>This is the rest",
    "reasoning": "This is a reasoning section",
    "content": "This is the rest",
}
WITH_THINK_STREAM = {
    "output": "<think>This is a reasoning section</think>This is the rest",
    "reasoning": "This is a reasoning section",
    "content": "This is the rest",
}

# --- No think tokens at all (thinking enabled, truncated) ---

# With thinking enabled (default), no think tokens means the output was
# truncated before </think> could be generated. All output is reasoning.
WITHOUT_THINK = {
    "output": "This is the rest",
    "reasoning": "This is the rest",
    "content": None,
}
# In streaming, the parser cannot distinguish "thinking disabled" from
# "reasoning in progress" when no think tokens have appeared yet.
# It assumes reasoning. The serving layer handles the "thinking disabled"
# case by checking prompt_is_reasoning_end_arr before calling the parser.
WITHOUT_THINK_STREAM = {
    "output": "This is the rest",
    "reasoning": "This is the rest",
    "content": None,
}

# --- Edge cases ---

COMPLETE_REASONING = {
    "output": "<think>This is a reasoning section</think>",
    "reasoning": "This is a reasoning section",
    "content": None,
}
MULTILINE_REASONING = {
    "output": "<think>This is a reasoning\nsection</think>This is the rest\nThat",
    "reasoning": "This is a reasoning\nsection",
    "content": "This is the rest\nThat",
}
# Truncated output: <think> present but no </think> (thinking enabled).
# Everything is reasoning because the output was cut off mid-thought.
ONLY_OPEN_TAG = {
    "output": "<think>This is a reasoning section",
    "reasoning": "This is a reasoning section",
    "content": None,
}

ONLY_OPEN_TAG_STREAM = {
    "output": "<think>This is a reasoning section",
    "reasoning": "This is a reasoning section",
    "content": None,
}

# Truncated output without <think> prefix (Qwen3.5 style where <think>
# is in the prompt). No </think> means truncation — all is reasoning.
TRUNCATED_NO_START_TOKEN = {
    "output": "This is a reasoning section",
    "reasoning": "This is a reasoning section",
    "content": None,
}

TRUNCATED_NO_START_TOKEN_STREAM = {
    "output": "This is a reasoning section",
    "reasoning": "This is a reasoning section",
    "content": None,
}

TEST_CASES = [
    pytest.param(
        False,
        WITHOUT_START_TOKEN,
        id="without_start_token",
    ),
    pytest.param(
        True,
        WITHOUT_START_TOKEN_STREAM,
        id="without_start_token_stream",
    ),
    pytest.param(
        False,
        WITHOUT_START_TOKEN_COMPLETE_REASONING,
        id="without_start_token_complete_reasoning",
    ),
    pytest.param(
        True,
        WITHOUT_START_TOKEN_COMPLETE_REASONING,
        id="without_start_token_complete_reasoning_stream",
    ),
    pytest.param(
        False,
        WITH_THINK,
        id="with_think",
    ),
    pytest.param(
        True,
        WITH_THINK_STREAM,
        id="with_think_stream",
    ),
    pytest.param(
        False,
        WITHOUT_THINK,
        id="without_think",
    ),
    pytest.param(
        True,
        WITHOUT_THINK_STREAM,
        id="without_think_stream",
    ),
    pytest.param(
        False,
        COMPLETE_REASONING,
        id="complete_reasoning",
    ),
    pytest.param(
        True,
        COMPLETE_REASONING,
        id="complete_reasoning_stream",
    ),
    pytest.param(
        False,
        MULTILINE_REASONING,
        id="multiline_reasoning",
    ),
    pytest.param(
        True,
        MULTILINE_REASONING,
        id="multiline_reasoning_stream",
    ),
    pytest.param(
        False,
        ONLY_OPEN_TAG,
        id="only_open_tag",
    ),
    pytest.param(
        True,
        ONLY_OPEN_TAG_STREAM,
        id="only_open_tag_stream",
    ),
    pytest.param(
        False,
        TRUNCATED_NO_START_TOKEN,
        id="truncated_no_start_token",
    ),
    pytest.param(
        True,
        TRUNCATED_NO_START_TOKEN_STREAM,
        id="truncated_no_start_token_stream",
    ),
]


@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
def test_reasoning(
    streaming: bool,
    param_dict: dict,
    qwen3_tokenizer,
):
    output = qwen3_tokenizer.tokenize(param_dict["output"])
    output_tokens: list[str] = [
        qwen3_tokenizer.convert_tokens_to_string([token]) for token in output
    ]
    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
        qwen3_tokenizer
    )

    reasoning, content = run_reasoning_extraction(
        parser, output_tokens, streaming=streaming
    )

    assert reasoning == param_dict["reasoning"]
    assert content == param_dict["content"]


# Multi-token delta tests: simulate real-world streaming where a single
# delta can contain multiple tokens (e.g., speculative decoding).
MULTI_TOKEN_DELTA_CASES = [
    pytest.param(
        # <think> grouped with following text in one delta
        ["<think>This is a reasoning section", "</think>", "This is the rest"],
        "This is a reasoning section",
        "This is the rest",
        id="start_token_grouped_with_text",
    ),
    pytest.param(
        # </think> grouped with following content in one delta
        ["reasoning section", "</think>This is the rest"],
        "reasoning section",
        "This is the rest",
        id="end_token_grouped_with_content",
    ),
    pytest.param(
        # <think> and </think> in the same delta, no content after
        ["<think>reasoning</think>"],
        "reasoning",
        None,
        id="start_and_end_in_one_delta_no_content",
    ),
    pytest.param(
        # No start token, end grouped with content (Qwen3.5 style)
        ["reasoning section", "</think>content"],
        "reasoning section",
        "content",
        id="no_start_end_grouped_with_content",
    ),
]


@pytest.mark.parametrize(
    "deltas, expected_reasoning, expected_content", MULTI_TOKEN_DELTA_CASES
)
def test_reasoning_streaming_multi_token_deltas(
    deltas: list[str],
    expected_reasoning: str | None,
    expected_content: str | None,
    qwen3_tokenizer,
):
    """Test that multi-token deltas don't leak <think> into reasoning."""
    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
        qwen3_tokenizer
    )

    reconstructor: StreamingReasoningReconstructor = run_reasoning_extraction_streaming(
        parser, deltas
    )

    assert reconstructor.reasoning == expected_reasoning
    assert (reconstructor.other_content or None) == expected_content


# --- Tests for enable_thinking=False (thinking explicitly disabled) ---


THINKING_DISABLED_CASES = [
    pytest.param(
        "This is plain content",
        None,
        "This is plain content",
        id="thinking_disabled_plain_content",
    ),
    pytest.param(
        "Some output without think tokens",
        None,
        "Some output without think tokens",
        id="thinking_disabled_no_think_tokens",
    ),
]


@pytest.mark.parametrize(
    "output, expected_reasoning, expected_content", THINKING_DISABLED_CASES
)
def test_reasoning_thinking_disabled(
    output: str,
    expected_reasoning: str | None,
    expected_content: str | None,
    qwen3_tokenizer,
):
    """When enable_thinking=False, output without </think> is all content."""
    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
        qwen3_tokenizer,
        chat_template_kwargs={"enable_thinking": False},
    )

    reasoning, content = parser.extract_reasoning(
        model_output=output,
        request=ChatCompletionRequest(messages=[], model="test-model"),
    )

    assert reasoning == expected_reasoning
    assert content == expected_content
[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00			`# SPDX-License-Identifier: Apache-2.0`
[Misc] Add SPDX-FileCopyrightText (#19100) Signed-off-by: simon-mo <simon.mo@hey.com> 2025-06-03 11:20:17 -07:00			`# SPDX-FileCopyrightText: Copyright contributors to the vLLM project`
[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00
			`import pytest`
			`from transformers import AutoTokenizer`

[Bugfix] Fix Qwen3/Qwen3.5 Reasoning Parser (#34779) Signed-off-by: Roger Wang <hey@rogerw.io> 2026-02-21 23:15:35 -08:00			`from tests.reasoning.utils import (`
			`StreamingReasoningReconstructor,`
			`run_reasoning_extraction,`
			`run_reasoning_extraction_streaming,`
			`)`
fix(reasoning): Qwen3ReasoningParser returns truncated output as reasoning (#35230) Signed-off-by: stakeswky <stakeswky@users.noreply.github.com> Co-authored-by: stakeswky <stakeswky@users.noreply.github.com> 2026-02-27 04:30:45 +08:00			`from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest`
[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00			`from vllm.reasoning import ReasoningParser, ReasoningParserManager`

			`parser_name = "qwen3"`
			`start_token = "<think>"`
			`end_token = "</think>"`

[Bugfix] Fix Qwen3/Qwen3.5 Reasoning Parser (#34779) Signed-off-by: Roger Wang <hey@rogerw.io> 2026-02-21 23:15:35 -08:00			`REASONING_MODEL_NAMES = [`
			`"Qwen/Qwen3-0.6B",`
			`"Qwen/Qwen3.5-397B-A17B",`
			`"Qwen/Qwen3-4B-Thinking-2507",`
			`]`


			`@pytest.fixture(scope="module", params=REASONING_MODEL_NAMES)`
			`def qwen3_tokenizer(request):`
			`return AutoTokenizer.from_pretrained(request.param)`

[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00
[Bugfix] Fix Qwen3/Qwen3.5 Reasoning Parser (#34779) Signed-off-by: Roger Wang <hey@rogerw.io> 2026-02-21 23:15:35 -08:00			`# --- <think> in prompt, only </think> in output (typical) ---`
[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00
[Bugfix] Fix Qwen3/Qwen3.5 Reasoning Parser (#34779) Signed-off-by: Roger Wang <hey@rogerw.io> 2026-02-21 23:15:35 -08:00			`WITHOUT_START_TOKEN = {`
			`"output": "This is a reasoning section</think>This is the rest",`
			`"reasoning": "This is a reasoning section",`
			`"content": "This is the rest",`
			`}`
			`WITHOUT_START_TOKEN_STREAM = {`
			`"output": "This is a reasoning section</think>This is the rest",`
			`"reasoning": "This is a reasoning section",`
			`"content": "This is the rest",`
			`}`
			`WITHOUT_START_TOKEN_COMPLETE_REASONING = {`
			`"output": "This is a reasoning section</think>",`
			`"reasoning": "This is a reasoning section",`
			`"content": None,`
			`}`
[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00
[Bugfix] Fix Qwen3/Qwen3.5 Reasoning Parser (#34779) Signed-off-by: Roger Wang <hey@rogerw.io> 2026-02-21 23:15:35 -08:00			`# --- <think> present in output (old template / edge case) ---`
[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00
			`WITH_THINK = {`
			`"output": "<think>This is a reasoning section</think>This is the rest",`
`reasoning_content` -> `reasoning` (#27752) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> 2025-11-08 04:15:08 -08:00			`"reasoning": "This is a reasoning section",`
[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00			`"content": "This is the rest",`
			`}`
			`WITH_THINK_STREAM = {`
			`"output": "<think>This is a reasoning section</think>This is the rest",`
`reasoning_content` -> `reasoning` (#27752) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> 2025-11-08 04:15:08 -08:00			`"reasoning": "This is a reasoning section",`
[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00			`"content": "This is the rest",`
			`}`
[Bugfix] Fix Qwen3/Qwen3.5 Reasoning Parser (#34779) Signed-off-by: Roger Wang <hey@rogerw.io> 2026-02-21 23:15:35 -08:00
fix(reasoning): Qwen3ReasoningParser returns truncated output as reasoning (#35230) Signed-off-by: stakeswky <stakeswky@users.noreply.github.com> Co-authored-by: stakeswky <stakeswky@users.noreply.github.com> 2026-02-27 04:30:45 +08:00			`# --- No think tokens at all (thinking enabled, truncated) ---`
[Bugfix] Fix Qwen3/Qwen3.5 Reasoning Parser (#34779) Signed-off-by: Roger Wang <hey@rogerw.io> 2026-02-21 23:15:35 -08:00
fix(reasoning): Qwen3ReasoningParser returns truncated output as reasoning (#35230) Signed-off-by: stakeswky <stakeswky@users.noreply.github.com> Co-authored-by: stakeswky <stakeswky@users.noreply.github.com> 2026-02-27 04:30:45 +08:00			`# With thinking enabled (default), no think tokens means the output was`
			`# truncated before </think> could be generated. All output is reasoning.`
[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00			`WITHOUT_THINK = {`
			`"output": "This is the rest",`
fix(reasoning): Qwen3ReasoningParser returns truncated output as reasoning (#35230) Signed-off-by: stakeswky <stakeswky@users.noreply.github.com> Co-authored-by: stakeswky <stakeswky@users.noreply.github.com> 2026-02-27 04:30:45 +08:00			`"reasoning": "This is the rest",`
			`"content": None,`
[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00			`}`
[Bugfix] Fix Qwen3/Qwen3.5 Reasoning Parser (#34779) Signed-off-by: Roger Wang <hey@rogerw.io> 2026-02-21 23:15:35 -08:00			`# In streaming, the parser cannot distinguish "thinking disabled" from`
			`# "reasoning in progress" when no think tokens have appeared yet.`
			`# It assumes reasoning. The serving layer handles the "thinking disabled"`
			`# case by checking prompt_is_reasoning_end_arr before calling the parser.`
[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00			`WITHOUT_THINK_STREAM = {`
			`"output": "This is the rest",`
[Bugfix] Fix Qwen3/Qwen3.5 Reasoning Parser (#34779) Signed-off-by: Roger Wang <hey@rogerw.io> 2026-02-21 23:15:35 -08:00			`"reasoning": "This is the rest",`
			`"content": None,`
[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00			`}`

[Bugfix] Fix Qwen3/Qwen3.5 Reasoning Parser (#34779) Signed-off-by: Roger Wang <hey@rogerw.io> 2026-02-21 23:15:35 -08:00			`# --- Edge cases ---`

[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00			`COMPLETE_REASONING = {`
			`"output": "<think>This is a reasoning section</think>",`
`reasoning_content` -> `reasoning` (#27752) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> 2025-11-08 04:15:08 -08:00			`"reasoning": "This is a reasoning section",`
[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00			`"content": None,`
			`}`
			`MULTILINE_REASONING = {`
			`"output": "<think>This is a reasoning\nsection</think>This is the rest\nThat",`
`reasoning_content` -> `reasoning` (#27752) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> 2025-11-08 04:15:08 -08:00			`"reasoning": "This is a reasoning\nsection",`
[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00			`"content": "This is the rest\nThat",`
			`}`
fix(reasoning): Qwen3ReasoningParser returns truncated output as reasoning (#35230) Signed-off-by: stakeswky <stakeswky@users.noreply.github.com> Co-authored-by: stakeswky <stakeswky@users.noreply.github.com> 2026-02-27 04:30:45 +08:00			`# Truncated output: <think> present but no </think> (thinking enabled).`
			`# Everything is reasoning because the output was cut off mid-thought.`
[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00			`ONLY_OPEN_TAG = {`
			`"output": "<think>This is a reasoning section",`
fix(reasoning): Qwen3ReasoningParser returns truncated output as reasoning (#35230) Signed-off-by: stakeswky <stakeswky@users.noreply.github.com> Co-authored-by: stakeswky <stakeswky@users.noreply.github.com> 2026-02-27 04:30:45 +08:00			`"reasoning": "This is a reasoning section",`
			`"content": None,`
[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00			`}`

			`ONLY_OPEN_TAG_STREAM = {`
			`"output": "<think>This is a reasoning section",`
`reasoning_content` -> `reasoning` (#27752) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> 2025-11-08 04:15:08 -08:00			`"reasoning": "This is a reasoning section",`
[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00			`"content": None,`
			`}`

fix(reasoning): Qwen3ReasoningParser returns truncated output as reasoning (#35230) Signed-off-by: stakeswky <stakeswky@users.noreply.github.com> Co-authored-by: stakeswky <stakeswky@users.noreply.github.com> 2026-02-27 04:30:45 +08:00			`# Truncated output without <think> prefix (Qwen3.5 style where <think>`
			`# is in the prompt). No </think> means truncation — all is reasoning.`
			`TRUNCATED_NO_START_TOKEN = {`
			`"output": "This is a reasoning section",`
			`"reasoning": "This is a reasoning section",`
			`"content": None,`
			`}`

			`TRUNCATED_NO_START_TOKEN_STREAM = {`
			`"output": "This is a reasoning section",`
			`"reasoning": "This is a reasoning section",`
			`"content": None,`
			`}`

[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00			`TEST_CASES = [`
[Bugfix] Fix Qwen3/Qwen3.5 Reasoning Parser (#34779) Signed-off-by: Roger Wang <hey@rogerw.io> 2026-02-21 23:15:35 -08:00			`pytest.param(`
			`False,`
			`WITHOUT_START_TOKEN,`
			`id="without_start_token",`
			`),`
			`pytest.param(`
			`True,`
			`WITHOUT_START_TOKEN_STREAM,`
			`id="without_start_token_stream",`
			`),`
			`pytest.param(`
			`False,`
			`WITHOUT_START_TOKEN_COMPLETE_REASONING,`
			`id="without_start_token_complete_reasoning",`
			`),`
			`pytest.param(`
			`True,`
			`WITHOUT_START_TOKEN_COMPLETE_REASONING,`
			`id="without_start_token_complete_reasoning_stream",`
			`),`
[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00			`pytest.param(`
			`False,`
			`WITH_THINK,`
			`id="with_think",`
			`),`
			`pytest.param(`
			`True,`
			`WITH_THINK_STREAM,`
			`id="with_think_stream",`
			`),`
			`pytest.param(`
			`False,`
			`WITHOUT_THINK,`
			`id="without_think",`
			`),`
			`pytest.param(`
			`True,`
			`WITHOUT_THINK_STREAM,`
			`id="without_think_stream",`
			`),`
			`pytest.param(`
			`False,`
			`COMPLETE_REASONING,`
			`id="complete_reasoning",`
			`),`
			`pytest.param(`
			`True,`
			`COMPLETE_REASONING,`
			`id="complete_reasoning_stream",`
			`),`
			`pytest.param(`
			`False,`
			`MULTILINE_REASONING,`
			`id="multiline_reasoning",`
			`),`
			`pytest.param(`
			`True,`
			`MULTILINE_REASONING,`
			`id="multiline_reasoning_stream",`
			`),`
			`pytest.param(`
			`False,`
			`ONLY_OPEN_TAG,`
			`id="only_open_tag",`
			`),`
			`pytest.param(`
			`True,`
			`ONLY_OPEN_TAG_STREAM,`
			`id="only_open_tag_stream",`
			`),`
fix(reasoning): Qwen3ReasoningParser returns truncated output as reasoning (#35230) Signed-off-by: stakeswky <stakeswky@users.noreply.github.com> Co-authored-by: stakeswky <stakeswky@users.noreply.github.com> 2026-02-27 04:30:45 +08:00			`pytest.param(`
			`False,`
			`TRUNCATED_NO_START_TOKEN,`
			`id="truncated_no_start_token",`
			`),`
			`pytest.param(`
			`True,`
			`TRUNCATED_NO_START_TOKEN_STREAM,`
			`id="truncated_no_start_token_stream",`
			`),`
[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00			`]`


			`@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)`
			`def test_reasoning(`
			`streaming: bool,`
			`param_dict: dict,`
			`qwen3_tokenizer,`
			`):`
			`output = qwen3_tokenizer.tokenize(param_dict["output"])`
			`output_tokens: list[str] = [`
			`qwen3_tokenizer.convert_tokens_to_string([token]) for token in output`
			`]`
			`parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(`
			`qwen3_tokenizer`
			`)`

			`reasoning, content = run_reasoning_extraction(`
			`parser, output_tokens, streaming=streaming`
			`)`

`reasoning_content` -> `reasoning` (#27752) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> 2025-11-08 04:15:08 -08:00			`assert reasoning == param_dict["reasoning"]`
[Bugfix] add qwen3 reasoning-parser fix content is None when disable … (#17369) Signed-off-by: mofanke <mofanke@gmail.com> 2025-04-30 00:32:40 +08:00			`assert content == param_dict["content"]`
[Bugfix] Fix Qwen3/Qwen3.5 Reasoning Parser (#34779) Signed-off-by: Roger Wang <hey@rogerw.io> 2026-02-21 23:15:35 -08:00

			`# Multi-token delta tests: simulate real-world streaming where a single`
			`# delta can contain multiple tokens (e.g., speculative decoding).`
			`MULTI_TOKEN_DELTA_CASES = [`
			`pytest.param(`
			`# <think> grouped with following text in one delta`
			`["<think>This is a reasoning section", "</think>", "This is the rest"],`
			`"This is a reasoning section",`
			`"This is the rest",`
			`id="start_token_grouped_with_text",`
			`),`
			`pytest.param(`
			`# </think> grouped with following content in one delta`
			`["reasoning section", "</think>This is the rest"],`
			`"reasoning section",`
			`"This is the rest",`
			`id="end_token_grouped_with_content",`
			`),`
			`pytest.param(`
			`# <think> and </think> in the same delta, no content after`
			`["<think>reasoning</think>"],`
			`"reasoning",`
			`None,`
			`id="start_and_end_in_one_delta_no_content",`
			`),`
			`pytest.param(`
			`# No start token, end grouped with content (Qwen3.5 style)`
			`["reasoning section", "</think>content"],`
			`"reasoning section",`
			`"content",`
			`id="no_start_end_grouped_with_content",`
			`),`
			`]`


			`@pytest.mark.parametrize(`
			`"deltas, expected_reasoning, expected_content", MULTI_TOKEN_DELTA_CASES`
			`)`
			`def test_reasoning_streaming_multi_token_deltas(`
			`deltas: list[str],`
			`expected_reasoning: str \| None,`
			`expected_content: str \| None,`
			`qwen3_tokenizer,`
			`):`
			`"""Test that multi-token deltas don't leak <think> into reasoning."""`
			`parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(`
			`qwen3_tokenizer`
			`)`

			`reconstructor: StreamingReasoningReconstructor = run_reasoning_extraction_streaming(`
			`parser, deltas`
			`)`

			`assert reconstructor.reasoning == expected_reasoning`
			`assert (reconstructor.other_content or None) == expected_content`
fix(reasoning): Qwen3ReasoningParser returns truncated output as reasoning (#35230) Signed-off-by: stakeswky <stakeswky@users.noreply.github.com> Co-authored-by: stakeswky <stakeswky@users.noreply.github.com> 2026-02-27 04:30:45 +08:00

			`# --- Tests for enable_thinking=False (thinking explicitly disabled) ---`


			`THINKING_DISABLED_CASES = [`
			`pytest.param(`
			`"This is plain content",`
			`None,`
			`"This is plain content",`
			`id="thinking_disabled_plain_content",`
			`),`
			`pytest.param(`
			`"Some output without think tokens",`
			`None,`
			`"Some output without think tokens",`
			`id="thinking_disabled_no_think_tokens",`
			`),`
			`]`


			`@pytest.mark.parametrize(`
			`"output, expected_reasoning, expected_content", THINKING_DISABLED_CASES`
			`)`
			`def test_reasoning_thinking_disabled(`
			`output: str,`
			`expected_reasoning: str \| None,`
			`expected_content: str \| None,`
			`qwen3_tokenizer,`
			`):`
			`"""When enable_thinking=False, output without </think> is all content."""`
			`parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(`
			`qwen3_tokenizer,`
			`chat_template_kwargs={"enable_thinking": False},`
			`)`

			`reasoning, content = parser.extract_reasoning(`
			`model_output=output,`
			`request=ChatCompletionRequest(messages=[], model="test-model"),`
			`)`

			`assert reasoning == expected_reasoning`
			`assert content == expected_content`