[Feature][Frontend]: Deprecate --enable-reasoning (#17452)

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
2025-05-01 21:46:16 +08:00
parent f5a3c655b2
commit 98060b001d
16 changed files with 49 additions and 91 deletions
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -967,7 +967,6 @@ async def init_app_state(
        return_tokens_as_token_ids=args.return_tokens_as_token_ids,
        enable_auto_tools=args.enable_auto_tool_choice,
        tool_parser=args.tool_call_parser,
-        enable_reasoning=args.enable_reasoning,
        reasoning_parser=args.reasoning_parser,
        enable_prompt_tokens_details=args.enable_prompt_tokens_details,
    ) if model_config.runner_type == "generate" else None
@@ -1053,7 +1052,7 @@ async def run_server(args, **uvicorn_kwargs) -> None:
                       f"(chose from {{ {','.join(valid_tool_parses)} }})")

    valid_reasoning_parses = ReasoningParserManager.reasoning_parsers.keys()
-    if args.enable_reasoning \
+    if args.reasoning_parser \
        and args.reasoning_parser not in valid_reasoning_parses:
        raise KeyError(
            f"invalid reasoning parser: {args.reasoning_parser} "
--- a/vllm/entrypoints/openai/cli_args.py
+++ b/vllm/entrypoints/openai/cli_args.py
@@ -284,11 +284,6 @@ def validate_parsed_serve_args(args: argparse.Namespace):
        raise TypeError("Error: --enable-auto-tool-choice requires "
                        "--tool-call-parser")

-    # Enable reasoning needs a reasoning parser to be valid
-    if args.enable_reasoning and not args.reasoning_parser:
-        raise TypeError("Error: --enable-reasoning requires "
-                        "--reasoning-parser")
-

 def create_parser_for_docs() -> FlexibleArgumentParser:
    parser_for_docs = FlexibleArgumentParser(
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -58,8 +58,7 @@ class OpenAIServingChat(OpenAIServing):
        chat_template: Optional[str],
        chat_template_content_format: ChatTemplateContentFormatOption,
        return_tokens_as_token_ids: bool = False,
-        enable_reasoning: bool = False,
-        reasoning_parser: Optional[str] = None,
+        reasoning_parser: str = "",
        enable_auto_tools: bool = False,
        tool_parser: Optional[str] = None,
        enable_prompt_tokens_details: bool = False,
@@ -82,18 +81,17 @@ class OpenAIServingChat(OpenAIServing):
                " the parallel_tool_calls client option is preset for "
                "compatibility reasons, it will be ignored.")

-        self.enable_reasoning: bool = enable_reasoning
        self.reasoning_parser: Optional[Callable[[AnyTokenizer],
                                                 ReasoningParser]] = None
-        if self.enable_reasoning:
+        if reasoning_parser:
            try:
                self.reasoning_parser = (
                    ReasoningParserManager.get_reasoning_parser(
                        reasoning_parser))
+                assert self.reasoning_parser is not None
            except Exception as e:
-                raise TypeError("Error: --enable-reasoning requires "
-                                f"reasoning_parser:'{reasoning_parser}' "
-                                "which has not been registered") from e
+                raise TypeError(
+                    f"{reasoning_parser=} has not been registered") from e
        self.tool_parser: Optional[Callable[[AnyTokenizer], ToolParser]] = None
        if self.enable_auto_tools:
            try:
@@ -423,15 +421,12 @@ class OpenAIServingChat(OpenAIServing):
            not tool_choice_function_name
            and self._should_stream_with_auto_tool_parsing(request))

-        should_stream_with_reasoning_parsing = (
-            self._should_stream_with_reasoning_parsing(request))
-
        all_previous_token_ids: Optional[list[list[int]]]
        function_name_returned: Optional[list[bool]] = None

        # Only one of these will be used, thus previous_texts and
        # all_previous_token_ids will not be used twice in the same iteration.
-        if tool_choice_auto or should_stream_with_reasoning_parsing:
+        if tool_choice_auto or self.reasoning_parser:
            # These are only required in "auto" tool choice case
            previous_texts = [""] * num_choices
            all_previous_token_ids = [[]] * num_choices
@@ -446,12 +441,7 @@ class OpenAIServingChat(OpenAIServing):
            previous_texts, all_previous_token_ids = None, None

        try:
-            # There is no need to check if the reasoning_parser is None
-            # because the should_stream_with_reasoning_parsing check
-            # already ensures that the reasoning_parser is not None.
-            # but the pre-commit hook requires it.
-            if should_stream_with_reasoning_parsing and \
-                self.reasoning_parser is not None:
+            if self.reasoning_parser:
                reasoning_parser = self.reasoning_parser(tokenizer)
        except RuntimeError as e:
            logger.exception("Error in reasoning parser creation.")
@@ -459,7 +449,6 @@ class OpenAIServingChat(OpenAIServing):
            yield f"data: {data}\n\n"
            yield "data: [DONE]\n\n"
            return
-
        # Prepare the tool parser if it's needed
        try:
            if tool_choice_auto and self.tool_parser:
@@ -592,7 +581,7 @@ class OpenAIServingChat(OpenAIServing):
                    delta_message: Optional[DeltaMessage]

                    # just update previous_texts and previous_token_ids
-                    if tool_choice_auto or should_stream_with_reasoning_parsing:
+                    if tool_choice_auto or self.reasoning_parser:
                        assert previous_texts is not None
                        assert all_previous_token_ids is not None
                        previous_text = previous_texts[i]
@@ -603,7 +592,7 @@ class OpenAIServingChat(OpenAIServing):

                    # handle streaming deltas for tools with named tool_choice
                    if tool_choice_function_name:
-                        if (self.enable_reasoning
+                        if (self.reasoning_parser
                                and not reasoning_parser.is_reasoning_end(
                                    previous_token_ids)):
                            assert reasoning_parser is not None
@@ -630,7 +619,7 @@ class OpenAIServingChat(OpenAIServing):
                                    current_text = ""
                        else:
                            # Just to add remaining `content`
-                            if self.enable_reasoning:
+                            if self.reasoning_parser:
                                delta_text = previous_text + delta_text
                                current_text = ""

@@ -660,7 +649,7 @@ class OpenAIServingChat(OpenAIServing):

                    # handle streaming deltas for tools with "auto" tool choice
                    # and reasoning parser
-                    elif tool_choice_auto and self.enable_reasoning:
+                    elif tool_choice_auto and self.reasoning_parser:
                        assert tool_parser is not None
                        assert reasoning_parser is not None
                        assert added_content_delta_arr is not None
@@ -728,8 +717,7 @@ class OpenAIServingChat(OpenAIServing):
                                delta_token_ids=output.token_ids,
                                request=request))
                    # when only reasoning
-                    elif self.enable_reasoning:
-                        assert reasoning_parser is not None
+                    elif self.reasoning_parser:
                        delta_message = (reasoning_parser.
                                         extract_reasoning_content_streaming(
                                             previous_text,
@@ -744,7 +732,7 @@ class OpenAIServingChat(OpenAIServing):
                        delta_message = DeltaMessage(content=delta_text)

                    # update the previous values for the next iteration
-                    if tool_choice_auto or should_stream_with_reasoning_parsing:
+                    if tool_choice_auto or self.reasoning_parser:
                        assert previous_texts is not None
                        assert all_previous_token_ids is not None
                        previous_texts[i] = current_text
@@ -931,17 +919,9 @@ class OpenAIServingChat(OpenAIServing):
                )
            else:
                logprobs = None
-
-            should_stream_with_reasoning_parsing = (
-                self._should_stream_with_reasoning_parsing(request))
-
-            # In the OpenAI API the finish_reason is "tools_called"
-            # if the tool choice is auto and the model produced a tool
-            # call. The same is not true for named function calls
            auto_tools_called = False

-            if should_stream_with_reasoning_parsing and \
-                self.reasoning_parser is not None:
+            if self.reasoning_parser:
                try:
                    reasoning_parser = self.reasoning_parser(tokenizer)
                except RuntimeError as e:
@@ -1176,17 +1156,6 @@ class OpenAIServingChat(OpenAIServing):
        return (request.tools and self.tool_parser and self.enable_auto_tools
                and request.tool_choice in ['auto', None])

-    def _should_stream_with_reasoning_parsing(self,
-                                              request: ChatCompletionRequest):
-        """
-            Utility function to check if streamed tokens should go through the
-            reasoning parser that was configured.
-    
-            We only want to do this IF reasoning is enabled and a reasoning 
-            parser is configured.
-            """
-        return self.enable_reasoning and self.reasoning_parser is not None
-
    def _should_check_for_unstreamed_tool_arg_tokens(
        self,
        delta_message: Optional[DeltaMessage],