From bc46be5daf5654b343bb94cdc5ea755d42bbba01 Mon Sep 17 00:00:00 2001 From: khluu Date: Tue, 10 Mar 2026 11:47:09 -0700 Subject: [PATCH] Revert "add nemotron v3 reasoning parser (#36393)" This reverts commit 8e39d39fd4e0a5d6cdbc3c86df8080a50c49164b. --- .../test_nemotron_v3_reasoning_parser.py | 150 ------------------ vllm/reasoning/__init__.py | 4 - .../reasoning/nemotron_v3_reasoning_parser.py | 32 ---- 3 files changed, 186 deletions(-) delete mode 100644 tests/reasoning/test_nemotron_v3_reasoning_parser.py delete mode 100644 vllm/reasoning/nemotron_v3_reasoning_parser.py diff --git a/tests/reasoning/test_nemotron_v3_reasoning_parser.py b/tests/reasoning/test_nemotron_v3_reasoning_parser.py deleted file mode 100644 index 3fe383a08..000000000 --- a/tests/reasoning/test_nemotron_v3_reasoning_parser.py +++ /dev/null @@ -1,150 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from typing import TypedDict - -import pytest -import regex as re - -from tests.reasoning.utils import run_reasoning_extraction -from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest -from vllm.reasoning import ReasoningParser, ReasoningParserManager - -parser_name = "nemotron_v3" - - -class ReasoningCase(TypedDict): - output: str - reasoning: str | None - content: str | None - - -class FakeNemotronTokenizer: - def __init__(self): - self._vocab = { - "": 1, - "": 2, - } - self._pattern = re.compile(r"(|)") - - def get_vocab(self) -> dict[str, int]: - return self._vocab - - def tokenize(self, text: str) -> list[str]: - tokens: list[str] = [] - for part in self._pattern.split(text): - if part: - tokens.append(part) - return tokens - - def convert_tokens_to_string(self, tokens: list[str]) -> str: - return "".join(tokens) - - -@pytest.fixture -def tokenizer(): - return FakeNemotronTokenizer() - - -@pytest.mark.parametrize( - "streaming,param_dict", - [ - pytest.param( - False, - { - "output": "This is a reasoning sectionThis is the rest", - "reasoning": "This is a reasoning section", - "content": "This is the rest", - }, - id="without_start_token", - ), - pytest.param( - True, - { - "output": "This is a reasoning sectionThis is the rest", - "reasoning": "This is a reasoning section", - "content": "This is the rest", - }, - id="without_start_token_streaming", - ), - pytest.param( - False, - { - "output": "This is a reasoning sectionThis is the rest", - "reasoning": "This is a reasoning section", - "content": "This is the rest", - }, - id="with_start_token", - ), - pytest.param( - True, - { - "output": "This is a reasoning sectionThis is the rest", - "reasoning": "This is a reasoning section", - "content": "This is the rest", - }, - id="with_start_token_streaming", - ), - ], -) -def test_nemotron_v3_reasoning( - tokenizer: FakeNemotronTokenizer, - streaming: bool, - param_dict: ReasoningCase, -): - output = tokenizer.tokenize(param_dict["output"]) - model_output = [tokenizer.convert_tokens_to_string([token]) for token in output] - parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)( - tokenizer - ) - - reasoning, content = run_reasoning_extraction( - parser, model_output, streaming=streaming - ) - - assert reasoning == param_dict["reasoning"] - assert content == param_dict["content"] - - -def test_nemotron_v3_without_thinking_returns_content( - tokenizer: FakeNemotronTokenizer, -): - parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name) - parser = parser_cls(tokenizer) - request = ChatCompletionRequest( - model="test-model", - messages=[], - chat_template_kwargs={"enable_thinking": False}, - ) - - reasoning, content = run_reasoning_extraction( - parser, - ["This is plain content"], - request=request, - streaming=False, - ) - - assert reasoning is None - assert content == "This is plain content" - - -def test_nemotron_v3_with_thinking_keeps_truncated_reasoning( - tokenizer: FakeNemotronTokenizer, -): - parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name) - parser = parser_cls(tokenizer) - request = ChatCompletionRequest( - model="test-model", - messages=[], - chat_template_kwargs={"enable_thinking": True}, - ) - - reasoning, content = run_reasoning_extraction( - parser, - ["This is truncated reasoning"], - request=request, - streaming=False, - ) - - assert reasoning == "This is truncated reasoning" - assert content is None diff --git a/vllm/reasoning/__init__.py b/vllm/reasoning/__init__.py index 8c78db6f1..df75e8584 100644 --- a/vllm/reasoning/__init__.py +++ b/vllm/reasoning/__init__.py @@ -68,10 +68,6 @@ _REASONING_PARSERS_TO_REGISTER = { "mistral_reasoning_parser", "MistralReasoningParser", ), - "nemotron_v3": ( - "nemotron_v3_reasoning_parser", - "NemotronV3ReasoningParser", - ), "olmo3": ( "olmo3_reasoning_parser", "Olmo3ReasoningParser", diff --git a/vllm/reasoning/nemotron_v3_reasoning_parser.py b/vllm/reasoning/nemotron_v3_reasoning_parser.py deleted file mode 100644 index a929793bf..000000000 --- a/vllm/reasoning/nemotron_v3_reasoning_parser.py +++ /dev/null @@ -1,32 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from vllm.entrypoints.openai.chat_completion.protocol import ( - ChatCompletionRequest, -) -from vllm.entrypoints.openai.responses.protocol import ( - ResponsesRequest, -) -from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser - - -class NemotronV3ReasoningParser(DeepSeekR1ReasoningParser): - """ - Reasoning parser for Nemotron V3 models. - """ - - def extract_reasoning( - self, model_output: str, request: ChatCompletionRequest | ResponsesRequest - ) -> tuple[str | None, str | None]: - reasoning_content, final_content = super().extract_reasoning( - model_output, request - ) - chat_template_kwargs = getattr(request, "chat_template_kwargs", None) - - if ( - chat_template_kwargs - and chat_template_kwargs.get("enable_thinking") is False - and final_content is None - ): - reasoning_content, final_content = final_content, reasoning_content - - return reasoning_content, final_content