From bc46be5daf5654b343bb94cdc5ea755d42bbba01 Mon Sep 17 00:00:00 2001
From: khluu <khluu000@gmail.com>
Date: Tue, 10 Mar 2026 11:47:09 -0700
Subject: [PATCH] Revert "add nemotron v3 reasoning parser (#36393)"

This reverts commit 8e39d39fd4e0a5d6cdbc3c86df8080a50c49164b.
---
 .../test_nemotron_v3_reasoning_parser.py      | 150 ------------------
 vllm/reasoning/__init__.py                    |   4 -
 .../reasoning/nemotron_v3_reasoning_parser.py |  32 ----
 3 files changed, 186 deletions(-)
 delete mode 100644 tests/reasoning/test_nemotron_v3_reasoning_parser.py
 delete mode 100644 vllm/reasoning/nemotron_v3_reasoning_parser.py
diff --git a/tests/reasoning/test_nemotron_v3_reasoning_parser.py b/tests/reasoning/test_nemotron_v3_reasoning_parser.py
deleted file mode 100644
index 3fe383a08..000000000
--- a/tests/reasoning/test_nemotron_v3_reasoning_parser.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-from typing import TypedDict
-
-import pytest
-import regex as re
-
-from tests.reasoning.utils import run_reasoning_extraction
-from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
-from vllm.reasoning import ReasoningParser, ReasoningParserManager
-
-parser_name = "nemotron_v3"
-
-
-class ReasoningCase(TypedDict):
-    output: str
-    reasoning: str | None
-    content: str | None
-
-
-class FakeNemotronTokenizer:
-    def __init__(self):
-        self._vocab = {
-            "<think>": 1,
-            "</think>": 2,
-        }
-        self._pattern = re.compile(r"(<think>|</think>)")
-
-    def get_vocab(self) -> dict[str, int]:
-        return self._vocab
-
-    def tokenize(self, text: str) -> list[str]:
-        tokens: list[str] = []
-        for part in self._pattern.split(text):
-            if part:
-                tokens.append(part)
-        return tokens
-
-    def convert_tokens_to_string(self, tokens: list[str]) -> str:
-        return "".join(tokens)
-
-
-@pytest.fixture
-def tokenizer():
-    return FakeNemotronTokenizer()
-
-
-@pytest.mark.parametrize(
-    "streaming,param_dict",
-    [
-        pytest.param(
-            False,
-            {
-                "output": "This is a reasoning section</think>This is the rest",
-                "reasoning": "This is a reasoning section",
-                "content": "This is the rest",
-            },
-            id="without_start_token",
-        ),
-        pytest.param(
-            True,
-            {
-                "output": "This is a reasoning section</think>This is the rest",
-                "reasoning": "This is a reasoning section",
-                "content": "This is the rest",
-            },
-            id="without_start_token_streaming",
-        ),
-        pytest.param(
-            False,
-            {
-                "output": "<think>This is a reasoning section</think>This is the rest",
-                "reasoning": "This is a reasoning section",
-                "content": "This is the rest",
-            },
-            id="with_start_token",
-        ),
-        pytest.param(
-            True,
-            {
-                "output": "<think>This is a reasoning section</think>This is the rest",
-                "reasoning": "This is a reasoning section",
-                "content": "This is the rest",
-            },
-            id="with_start_token_streaming",
-        ),
-    ],
-)
-def test_nemotron_v3_reasoning(
-    tokenizer: FakeNemotronTokenizer,
-    streaming: bool,
-    param_dict: ReasoningCase,
-):
-    output = tokenizer.tokenize(param_dict["output"])
-    model_output = [tokenizer.convert_tokens_to_string([token]) for token in output]
-    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
-        tokenizer
-    )
-
-    reasoning, content = run_reasoning_extraction(
-        parser, model_output, streaming=streaming
-    )
-
-    assert reasoning == param_dict["reasoning"]
-    assert content == param_dict["content"]
-
-
-def test_nemotron_v3_without_thinking_returns_content(
-    tokenizer: FakeNemotronTokenizer,
-):
-    parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
-    parser = parser_cls(tokenizer)
-    request = ChatCompletionRequest(
-        model="test-model",
-        messages=[],
-        chat_template_kwargs={"enable_thinking": False},
-    )
-
-    reasoning, content = run_reasoning_extraction(
-        parser,
-        ["This is plain content"],
-        request=request,
-        streaming=False,
-    )
-
-    assert reasoning is None
-    assert content == "This is plain content"
-
-
-def test_nemotron_v3_with_thinking_keeps_truncated_reasoning(
-    tokenizer: FakeNemotronTokenizer,
-):
-    parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
-    parser = parser_cls(tokenizer)
-    request = ChatCompletionRequest(
-        model="test-model",
-        messages=[],
-        chat_template_kwargs={"enable_thinking": True},
-    )
-
-    reasoning, content = run_reasoning_extraction(
-        parser,
-        ["This is truncated reasoning"],
-        request=request,
-        streaming=False,
-    )
-
-    assert reasoning == "This is truncated reasoning"
-    assert content is None
diff --git a/vllm/reasoning/__init__.py b/vllm/reasoning/__init__.py
index 8c78db6f1..df75e8584 100644
--- a/vllm/reasoning/__init__.py
+++ b/vllm/reasoning/__init__.py
@@ -68,10 +68,6 @@ _REASONING_PARSERS_TO_REGISTER = {
         "mistral_reasoning_parser",
         "MistralReasoningParser",
     ),
-    "nemotron_v3": (
-        "nemotron_v3_reasoning_parser",
-        "NemotronV3ReasoningParser",
-    ),
     "olmo3": (
         "olmo3_reasoning_parser",
         "Olmo3ReasoningParser",
diff --git a/vllm/reasoning/nemotron_v3_reasoning_parser.py b/vllm/reasoning/nemotron_v3_reasoning_parser.py
deleted file mode 100644
index a929793bf..000000000
--- a/vllm/reasoning/nemotron_v3_reasoning_parser.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from vllm.entrypoints.openai.chat_completion.protocol import (
-    ChatCompletionRequest,
-)
-from vllm.entrypoints.openai.responses.protocol import (
-    ResponsesRequest,
-)
-from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
-
-
-class NemotronV3ReasoningParser(DeepSeekR1ReasoningParser):
-    """
-    Reasoning parser for Nemotron V3 models.
-    """
-
-    def extract_reasoning(
-        self, model_output: str, request: ChatCompletionRequest | ResponsesRequest
-    ) -> tuple[str | None, str | None]:
-        reasoning_content, final_content = super().extract_reasoning(
-            model_output, request
-        )
-        chat_template_kwargs = getattr(request, "chat_template_kwargs", None)
-
-        if (
-            chat_template_kwargs
-            and chat_template_kwargs.get("enable_thinking") is False
-            and final_content is None
-        ):
-            reasoning_content, final_content = final_content, reasoning_content
-
-        return reasoning_content, final_content