[Refactor] [11/N] to simplify the mcp architecture (#32396)
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
@@ -7,7 +7,7 @@ import pytest_asyncio
|
||||
from openai import OpenAI
|
||||
from openai_harmony import ToolDescription, ToolNamespaceConfig
|
||||
|
||||
from vllm.entrypoints.tool_server import MCPToolServer
|
||||
from vllm.entrypoints.mcp.tool_server import MCPToolServer
|
||||
|
||||
from ....utils import RemoteOpenAIServer
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.entrypoints.tool_server import ToolServer
|
||||
from vllm.entrypoints.mcp.tool_server import ToolServer
|
||||
from vllm.reasoning.gptoss_reasoning_parser import (
|
||||
GptOssReasoningParser,
|
||||
)
|
||||
|
||||
@@ -13,15 +13,15 @@ from openai.types.responses.tool import (
|
||||
Tool,
|
||||
)
|
||||
|
||||
from vllm.entrypoints.context import ConversationContext
|
||||
from vllm.entrypoints.mcp.tool_server import ToolServer
|
||||
from vllm.entrypoints.openai.engine.protocol import ErrorResponse
|
||||
from vllm.entrypoints.openai.responses.context import ConversationContext
|
||||
from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
|
||||
from vllm.entrypoints.openai.responses.serving import (
|
||||
OpenAIServingResponses,
|
||||
_extract_allowed_tools_from_mcp_requests,
|
||||
extract_tool_types,
|
||||
)
|
||||
from vllm.entrypoints.tool_server import ToolServer
|
||||
from vllm.inputs.data import TokensPrompt
|
||||
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ from unittest.mock import MagicMock, patch
|
||||
import pytest
|
||||
from openai_harmony import Author, Message, Role, StreamState, TextContent
|
||||
|
||||
from vllm.entrypoints.context import (
|
||||
from vllm.entrypoints.openai.responses.context import (
|
||||
HarmonyContext,
|
||||
StreamingHarmonyContext,
|
||||
TurnMetrics,
|
||||
@@ -71,7 +71,7 @@ async def generate_mock_outputs(
|
||||
def mock_parser():
|
||||
"""Set up a mock parser for tests."""
|
||||
with patch(
|
||||
"vllm.entrypoints.context.get_streamable_parser_for_assistant"
|
||||
"vllm.entrypoints.openai.responses.context.get_streamable_parser_for_assistant"
|
||||
) as mock_parser_factory:
|
||||
# Create a mock parser object
|
||||
parser = MagicMock()
|
||||
@@ -284,7 +284,7 @@ async def test_negative_tool_tokens_edge_case():
|
||||
"""Test edge case where calculation could result in negative tool
|
||||
tokens. We should log an error and clamp the value to 0."""
|
||||
# Use patch to check if logger.error was called
|
||||
with patch("vllm.entrypoints.context.logger.error") as mock_log:
|
||||
with patch("vllm.entrypoints.openai.responses.context.logger.error") as mock_log:
|
||||
context = HarmonyContext(messages=[], available_tools=["browser"])
|
||||
|
||||
# First turn
|
||||
|
||||
@@ -16,7 +16,7 @@ from openai.types.responses.response_reasoning_item import (
|
||||
)
|
||||
|
||||
from vllm.entrypoints.constants import MCP_PREFIX
|
||||
from vllm.entrypoints.responses_utils import (
|
||||
from vllm.entrypoints.openai.responses.utils import (
|
||||
_construct_single_message_from_response_item,
|
||||
_maybe_combine_reasoning_and_tool_call,
|
||||
construct_chat_messages_with_tool_call,
|
||||
|
||||
@@ -8,7 +8,7 @@ from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.entrypoints.tool_server import ToolServer
|
||||
from vllm.entrypoints.mcp.tool_server import ToolServer
|
||||
from vllm.reasoning.gptoss_reasoning_parser import (
|
||||
GptOssReasoningParser,
|
||||
from_builtin_tool_to_tag,
|
||||
|
||||
2
vllm/entrypoints/mcp/__init__.py
Normal file
2
vllm/entrypoints/mcp/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
@@ -15,7 +15,7 @@ from vllm.utils import random_uuid
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# Avoid circular import.
|
||||
from vllm.entrypoints.context import ConversationContext
|
||||
from vllm.entrypoints.openai.responses.context import ConversationContext
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
@@ -81,7 +81,7 @@ class HarmonyBrowserTool(Tool):
|
||||
logger.info_once("Browser tool initialized")
|
||||
|
||||
async def get_result(self, context: "ConversationContext") -> Any:
|
||||
from vllm.entrypoints.context import HarmonyContext
|
||||
from vllm.entrypoints.openai.responses.context import HarmonyContext
|
||||
|
||||
assert isinstance(context, HarmonyContext)
|
||||
last_msg = context.messages[-1]
|
||||
@@ -141,7 +141,7 @@ class HarmonyPythonTool(Tool):
|
||||
logger.info_once("Code interpreter tool initialized")
|
||||
|
||||
async def get_result(self, context: "ConversationContext") -> Any:
|
||||
from vllm.entrypoints.context import HarmonyContext
|
||||
from vllm.entrypoints.openai.responses.context import HarmonyContext
|
||||
|
||||
assert isinstance(context, HarmonyContext)
|
||||
last_msg = context.messages[-1]
|
||||
@@ -155,7 +155,7 @@ class HarmonyPythonTool(Tool):
|
||||
This function converts parsable context types to harmony and
|
||||
back so we can use GPTOSS demo python tool
|
||||
"""
|
||||
from vllm.entrypoints.context import ParsableContext
|
||||
from vllm.entrypoints.openai.responses.context import ParsableContext
|
||||
|
||||
assert isinstance(context, ParsableContext)
|
||||
|
||||
@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Any
|
||||
|
||||
from openai_harmony import ToolDescription, ToolNamespaceConfig
|
||||
|
||||
from vllm.entrypoints.tool import HarmonyBrowserTool, HarmonyPythonTool, Tool
|
||||
from vllm.entrypoints.mcp.tool import HarmonyBrowserTool, HarmonyPythonTool, Tool
|
||||
from vllm.logger import init_logger
|
||||
|
||||
logger = init_logger(__name__)
|
||||
@@ -36,6 +36,7 @@ from vllm.engine.protocol import EngineClient
|
||||
from vllm.entrypoints.anthropic.serving import AnthropicServingMessages
|
||||
from vllm.entrypoints.launcher import serve_http
|
||||
from vllm.entrypoints.logger import RequestLogger
|
||||
from vllm.entrypoints.mcp.tool_server import DemoToolServer, MCPToolServer, ToolServer
|
||||
from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
|
||||
from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args
|
||||
from vllm.entrypoints.openai.completion.serving import OpenAIServingCompletion
|
||||
@@ -62,7 +63,6 @@ from vllm.entrypoints.serve.elastic_ep.middleware import (
|
||||
ScalingMiddleware,
|
||||
)
|
||||
from vllm.entrypoints.serve.tokenize.serving import OpenAIServingTokenization
|
||||
from vllm.entrypoints.tool_server import DemoToolServer, MCPToolServer, ToolServer
|
||||
from vllm.entrypoints.utils import (
|
||||
cli_env_setup,
|
||||
log_non_default_args,
|
||||
|
||||
@@ -31,12 +31,6 @@ from vllm.entrypoints.chat_utils import (
|
||||
parse_chat_messages_futures,
|
||||
resolve_chat_template_content_format,
|
||||
)
|
||||
from vllm.entrypoints.context import (
|
||||
ConversationContext,
|
||||
HarmonyContext,
|
||||
ParsableContext,
|
||||
StreamingHarmonyContext,
|
||||
)
|
||||
from vllm.entrypoints.logger import RequestLogger
|
||||
from vllm.entrypoints.openai.chat_completion.protocol import (
|
||||
ChatCompletionNamedToolChoiceParam,
|
||||
@@ -54,10 +48,19 @@ from vllm.entrypoints.openai.engine.protocol import (
|
||||
FunctionDefinition,
|
||||
)
|
||||
from vllm.entrypoints.openai.models.serving import OpenAIServingModels
|
||||
from vllm.entrypoints.openai.responses.context import (
|
||||
ConversationContext,
|
||||
HarmonyContext,
|
||||
ParsableContext,
|
||||
StreamingHarmonyContext,
|
||||
)
|
||||
from vllm.entrypoints.openai.responses.protocol import (
|
||||
ResponseInputOutputItem,
|
||||
ResponsesRequest,
|
||||
)
|
||||
from vllm.entrypoints.openai.responses.utils import (
|
||||
construct_input_messages,
|
||||
)
|
||||
from vllm.entrypoints.openai.translations.protocol import (
|
||||
TranscriptionRequest,
|
||||
TranscriptionResponse,
|
||||
@@ -85,9 +88,6 @@ from vllm.entrypoints.pooling.score.protocol import (
|
||||
ScoreResponse,
|
||||
)
|
||||
from vllm.entrypoints.renderer import BaseRenderer, CompletionRenderer, RenderConfig
|
||||
from vllm.entrypoints.responses_utils import (
|
||||
construct_input_messages,
|
||||
)
|
||||
from vllm.entrypoints.serve.disagg.protocol import GenerateRequest, GenerateResponse
|
||||
from vllm.entrypoints.serve.tokenize.protocol import (
|
||||
DetokenizeRequest,
|
||||
|
||||
@@ -22,6 +22,8 @@ from vllm.entrypoints.chat_utils import (
|
||||
ChatTemplateContentFormatOption,
|
||||
)
|
||||
from vllm.entrypoints.constants import MCP_PREFIX
|
||||
from vllm.entrypoints.mcp.tool import Tool
|
||||
from vllm.entrypoints.mcp.tool_server import ToolServer
|
||||
from vllm.entrypoints.openai.engine.protocol import (
|
||||
FunctionCall,
|
||||
)
|
||||
@@ -38,9 +40,7 @@ from vllm.entrypoints.openai.responses.protocol import (
|
||||
ResponseRawMessageAndToken,
|
||||
ResponsesRequest,
|
||||
)
|
||||
from vllm.entrypoints.responses_utils import construct_tool_dicts
|
||||
from vllm.entrypoints.tool import Tool
|
||||
from vllm.entrypoints.tool_server import ToolServer
|
||||
from vllm.entrypoints.openai.responses.utils import construct_tool_dicts
|
||||
from vllm.outputs import RequestOutput
|
||||
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
@@ -64,14 +64,8 @@ from vllm.entrypoints.chat_utils import (
|
||||
ChatCompletionMessageParam,
|
||||
ChatTemplateContentFormatOption,
|
||||
)
|
||||
from vllm.entrypoints.context import (
|
||||
ConversationContext,
|
||||
HarmonyContext,
|
||||
ParsableContext,
|
||||
SimpleContext,
|
||||
StreamingHarmonyContext,
|
||||
)
|
||||
from vllm.entrypoints.logger import RequestLogger
|
||||
from vllm.entrypoints.mcp.tool_server import ToolServer
|
||||
from vllm.entrypoints.openai.engine.protocol import (
|
||||
DeltaMessage,
|
||||
ErrorResponse,
|
||||
@@ -94,6 +88,13 @@ from vllm.entrypoints.openai.parser.harmony_utils import (
|
||||
parse_response_input,
|
||||
render_for_completion,
|
||||
)
|
||||
from vllm.entrypoints.openai.responses.context import (
|
||||
ConversationContext,
|
||||
HarmonyContext,
|
||||
ParsableContext,
|
||||
SimpleContext,
|
||||
StreamingHarmonyContext,
|
||||
)
|
||||
from vllm.entrypoints.openai.responses.protocol import (
|
||||
InputTokensDetails,
|
||||
OutputTokensDetails,
|
||||
@@ -108,13 +109,12 @@ from vllm.entrypoints.openai.responses.protocol import (
|
||||
ResponseUsage,
|
||||
StreamingResponsesResponse,
|
||||
)
|
||||
from vllm.entrypoints.responses_utils import (
|
||||
from vllm.entrypoints.openai.responses.utils import (
|
||||
construct_input_messages,
|
||||
construct_tool_dicts,
|
||||
extract_tool_types,
|
||||
should_continue_final_message,
|
||||
)
|
||||
from vllm.entrypoints.tool_server import ToolServer
|
||||
from vllm.exceptions import VLLMValidationError
|
||||
from vllm.inputs.data import TokensPrompt
|
||||
from vllm.logger import init_logger
|
||||
|
||||
@@ -8,7 +8,7 @@ from collections.abc import Callable, Sequence
|
||||
from functools import cached_property
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from vllm.entrypoints.tool_server import ToolServer
|
||||
from vllm.entrypoints.mcp.tool_server import ToolServer
|
||||
from vllm.logger import init_logger
|
||||
from vllm.utils.collection_utils import is_list_of
|
||||
from vllm.utils.import_utils import import_from_path
|
||||
|
||||
@@ -5,12 +5,12 @@ from collections.abc import Sequence
|
||||
|
||||
from transformers import PreTrainedTokenizerBase
|
||||
|
||||
from vllm.entrypoints.mcp.tool_server import ToolServer
|
||||
from vllm.entrypoints.openai.chat_completion.protocol import (
|
||||
ChatCompletionRequest,
|
||||
)
|
||||
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
|
||||
from vllm.entrypoints.openai.parser.harmony_utils import parse_chat_output
|
||||
from vllm.entrypoints.tool_server import ToolServer
|
||||
from vllm.logger import init_logger
|
||||
from vllm.reasoning import ReasoningParser
|
||||
|
||||
|
||||
Reference in New Issue
Block a user