[Refactor] [11/N] to simplify the mcp architecture (#32396)

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
Chauncey
2026-01-15 18:49:31 +08:00
committed by GitHub
parent 3a4e10c847
commit 707b44cc28
16 changed files with 40 additions and 38 deletions

View File

@@ -7,7 +7,7 @@ import pytest_asyncio
from openai import OpenAI
from openai_harmony import ToolDescription, ToolNamespaceConfig
from vllm.entrypoints.tool_server import MCPToolServer
from vllm.entrypoints.mcp.tool_server import MCPToolServer
from ....utils import RemoteOpenAIServer

View File

@@ -8,7 +8,7 @@ from unittest.mock import Mock
import pytest
from vllm.entrypoints.tool_server import ToolServer
from vllm.entrypoints.mcp.tool_server import ToolServer
from vllm.reasoning.gptoss_reasoning_parser import (
GptOssReasoningParser,
)

View File

@@ -13,15 +13,15 @@ from openai.types.responses.tool import (
Tool,
)
from vllm.entrypoints.context import ConversationContext
from vllm.entrypoints.mcp.tool_server import ToolServer
from vllm.entrypoints.openai.engine.protocol import ErrorResponse
from vllm.entrypoints.openai.responses.context import ConversationContext
from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
from vllm.entrypoints.openai.responses.serving import (
OpenAIServingResponses,
_extract_allowed_tools_from_mcp_requests,
extract_tool_types,
)
from vllm.entrypoints.tool_server import ToolServer
from vllm.inputs.data import TokensPrompt

View File

@@ -6,7 +6,7 @@ from unittest.mock import MagicMock, patch
import pytest
from openai_harmony import Author, Message, Role, StreamState, TextContent
from vllm.entrypoints.context import (
from vllm.entrypoints.openai.responses.context import (
HarmonyContext,
StreamingHarmonyContext,
TurnMetrics,
@@ -71,7 +71,7 @@ async def generate_mock_outputs(
def mock_parser():
"""Set up a mock parser for tests."""
with patch(
"vllm.entrypoints.context.get_streamable_parser_for_assistant"
"vllm.entrypoints.openai.responses.context.get_streamable_parser_for_assistant"
) as mock_parser_factory:
# Create a mock parser object
parser = MagicMock()
@@ -284,7 +284,7 @@ async def test_negative_tool_tokens_edge_case():
"""Test edge case where calculation could result in negative tool
tokens. We should log an error and clamp the value to 0."""
# Use patch to check if logger.error was called
with patch("vllm.entrypoints.context.logger.error") as mock_log:
with patch("vllm.entrypoints.openai.responses.context.logger.error") as mock_log:
context = HarmonyContext(messages=[], available_tools=["browser"])
# First turn

View File

@@ -16,7 +16,7 @@ from openai.types.responses.response_reasoning_item import (
)
from vllm.entrypoints.constants import MCP_PREFIX
from vllm.entrypoints.responses_utils import (
from vllm.entrypoints.openai.responses.utils import (
_construct_single_message_from_response_item,
_maybe_combine_reasoning_and_tool_call,
construct_chat_messages_with_tool_call,

View File

@@ -8,7 +8,7 @@ from unittest.mock import Mock
import pytest
from vllm.entrypoints.tool_server import ToolServer
from vllm.entrypoints.mcp.tool_server import ToolServer
from vllm.reasoning.gptoss_reasoning_parser import (
GptOssReasoningParser,
from_builtin_tool_to_tag,

View File

@@ -0,0 +1,2 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

View File

@@ -15,7 +15,7 @@ from vllm.utils import random_uuid
if TYPE_CHECKING:
# Avoid circular import.
from vllm.entrypoints.context import ConversationContext
from vllm.entrypoints.openai.responses.context import ConversationContext
logger = init_logger(__name__)
@@ -81,7 +81,7 @@ class HarmonyBrowserTool(Tool):
logger.info_once("Browser tool initialized")
async def get_result(self, context: "ConversationContext") -> Any:
from vllm.entrypoints.context import HarmonyContext
from vllm.entrypoints.openai.responses.context import HarmonyContext
assert isinstance(context, HarmonyContext)
last_msg = context.messages[-1]
@@ -141,7 +141,7 @@ class HarmonyPythonTool(Tool):
logger.info_once("Code interpreter tool initialized")
async def get_result(self, context: "ConversationContext") -> Any:
from vllm.entrypoints.context import HarmonyContext
from vllm.entrypoints.openai.responses.context import HarmonyContext
assert isinstance(context, HarmonyContext)
last_msg = context.messages[-1]
@@ -155,7 +155,7 @@ class HarmonyPythonTool(Tool):
This function converts parsable context types to harmony and
back so we can use GPTOSS demo python tool
"""
from vllm.entrypoints.context import ParsableContext
from vllm.entrypoints.openai.responses.context import ParsableContext
assert isinstance(context, ParsableContext)

View File

@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Any
from openai_harmony import ToolDescription, ToolNamespaceConfig
from vllm.entrypoints.tool import HarmonyBrowserTool, HarmonyPythonTool, Tool
from vllm.entrypoints.mcp.tool import HarmonyBrowserTool, HarmonyPythonTool, Tool
from vllm.logger import init_logger
logger = init_logger(__name__)

View File

@@ -36,6 +36,7 @@ from vllm.engine.protocol import EngineClient
from vllm.entrypoints.anthropic.serving import AnthropicServingMessages
from vllm.entrypoints.launcher import serve_http
from vllm.entrypoints.logger import RequestLogger
from vllm.entrypoints.mcp.tool_server import DemoToolServer, MCPToolServer, ToolServer
from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args
from vllm.entrypoints.openai.completion.serving import OpenAIServingCompletion
@@ -62,7 +63,6 @@ from vllm.entrypoints.serve.elastic_ep.middleware import (
ScalingMiddleware,
)
from vllm.entrypoints.serve.tokenize.serving import OpenAIServingTokenization
from vllm.entrypoints.tool_server import DemoToolServer, MCPToolServer, ToolServer
from vllm.entrypoints.utils import (
cli_env_setup,
log_non_default_args,

View File

@@ -31,12 +31,6 @@ from vllm.entrypoints.chat_utils import (
parse_chat_messages_futures,
resolve_chat_template_content_format,
)
from vllm.entrypoints.context import (
ConversationContext,
HarmonyContext,
ParsableContext,
StreamingHarmonyContext,
)
from vllm.entrypoints.logger import RequestLogger
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionNamedToolChoiceParam,
@@ -54,10 +48,19 @@ from vllm.entrypoints.openai.engine.protocol import (
FunctionDefinition,
)
from vllm.entrypoints.openai.models.serving import OpenAIServingModels
from vllm.entrypoints.openai.responses.context import (
ConversationContext,
HarmonyContext,
ParsableContext,
StreamingHarmonyContext,
)
from vllm.entrypoints.openai.responses.protocol import (
ResponseInputOutputItem,
ResponsesRequest,
)
from vllm.entrypoints.openai.responses.utils import (
construct_input_messages,
)
from vllm.entrypoints.openai.translations.protocol import (
TranscriptionRequest,
TranscriptionResponse,
@@ -85,9 +88,6 @@ from vllm.entrypoints.pooling.score.protocol import (
ScoreResponse,
)
from vllm.entrypoints.renderer import BaseRenderer, CompletionRenderer, RenderConfig
from vllm.entrypoints.responses_utils import (
construct_input_messages,
)
from vllm.entrypoints.serve.disagg.protocol import GenerateRequest, GenerateResponse
from vllm.entrypoints.serve.tokenize.protocol import (
DetokenizeRequest,

View File

@@ -22,6 +22,8 @@ from vllm.entrypoints.chat_utils import (
ChatTemplateContentFormatOption,
)
from vllm.entrypoints.constants import MCP_PREFIX
from vllm.entrypoints.mcp.tool import Tool
from vllm.entrypoints.mcp.tool_server import ToolServer
from vllm.entrypoints.openai.engine.protocol import (
FunctionCall,
)
@@ -38,9 +40,7 @@ from vllm.entrypoints.openai.responses.protocol import (
ResponseRawMessageAndToken,
ResponsesRequest,
)
from vllm.entrypoints.responses_utils import construct_tool_dicts
from vllm.entrypoints.tool import Tool
from vllm.entrypoints.tool_server import ToolServer
from vllm.entrypoints.openai.responses.utils import construct_tool_dicts
from vllm.outputs import RequestOutput
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
from vllm.tokenizers import TokenizerLike

View File

@@ -64,14 +64,8 @@ from vllm.entrypoints.chat_utils import (
ChatCompletionMessageParam,
ChatTemplateContentFormatOption,
)
from vllm.entrypoints.context import (
ConversationContext,
HarmonyContext,
ParsableContext,
SimpleContext,
StreamingHarmonyContext,
)
from vllm.entrypoints.logger import RequestLogger
from vllm.entrypoints.mcp.tool_server import ToolServer
from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage,
ErrorResponse,
@@ -94,6 +88,13 @@ from vllm.entrypoints.openai.parser.harmony_utils import (
parse_response_input,
render_for_completion,
)
from vllm.entrypoints.openai.responses.context import (
ConversationContext,
HarmonyContext,
ParsableContext,
SimpleContext,
StreamingHarmonyContext,
)
from vllm.entrypoints.openai.responses.protocol import (
InputTokensDetails,
OutputTokensDetails,
@@ -108,13 +109,12 @@ from vllm.entrypoints.openai.responses.protocol import (
ResponseUsage,
StreamingResponsesResponse,
)
from vllm.entrypoints.responses_utils import (
from vllm.entrypoints.openai.responses.utils import (
construct_input_messages,
construct_tool_dicts,
extract_tool_types,
should_continue_final_message,
)
from vllm.entrypoints.tool_server import ToolServer
from vllm.exceptions import VLLMValidationError
from vllm.inputs.data import TokensPrompt
from vllm.logger import init_logger

View File

@@ -8,7 +8,7 @@ from collections.abc import Callable, Sequence
from functools import cached_property
from typing import TYPE_CHECKING, Any
from vllm.entrypoints.tool_server import ToolServer
from vllm.entrypoints.mcp.tool_server import ToolServer
from vllm.logger import init_logger
from vllm.utils.collection_utils import is_list_of
from vllm.utils.import_utils import import_from_path

View File

@@ -5,12 +5,12 @@ from collections.abc import Sequence
from transformers import PreTrainedTokenizerBase
from vllm.entrypoints.mcp.tool_server import ToolServer
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.entrypoints.openai.parser.harmony_utils import parse_chat_output
from vllm.entrypoints.tool_server import ToolServer
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser