diff --git a/vllm/entrypoints/openai/engine/serving.py b/vllm/entrypoints/openai/engine/serving.py index 405db1a13..c19910c51 100644 --- a/vllm/entrypoints/openai/engine/serving.py +++ b/vllm/entrypoints/openai/engine/serving.py @@ -4,7 +4,7 @@ import asyncio import contextlib import json import time -from collections.abc import AsyncGenerator, Callable, Mapping +from collections.abc import AsyncGenerator, Mapping from dataclasses import dataclass, field from http import HTTPStatus from typing import Any, ClassVar, Generic, Protocol, TypeAlias, TypeVar @@ -882,7 +882,7 @@ class OpenAIServing: request: ResponsesRequest | ChatCompletionRequest, tokenizer: TokenizerLike | None, enable_auto_tools: bool, - tool_parser_cls: Callable[[TokenizerLike], ToolParser] | None, + tool_parser_cls: type[ToolParser] | None, content: str | None = None, ) -> tuple[list[FunctionCall] | None, str | None]: function_calls = list[FunctionCall]() diff --git a/vllm/entrypoints/openai/parser/responses_parser.py b/vllm/entrypoints/openai/parser/responses_parser.py index b5518f0f1..e3d7c588a 100644 --- a/vllm/entrypoints/openai/parser/responses_parser.py +++ b/vllm/entrypoints/openai/parser/responses_parser.py @@ -39,7 +39,7 @@ class ResponsesParser: reasoning_parser_cls: Callable[[TokenizerLike], ReasoningParser], response_messages: list[ResponseInputOutputItem], request: ResponsesRequest, - tool_parser_cls: Callable[[TokenizerLike], ToolParser] | None, + tool_parser_cls: type[ToolParser] | None, ): self.response_messages: list[ResponseInputOutputItem] = ( # TODO: initial messages may not be properly typed diff --git a/vllm/entrypoints/openai/responses/context.py b/vllm/entrypoints/openai/responses/context.py index a4c55c23c..48360173c 100644 --- a/vllm/entrypoints/openai/responses/context.py +++ b/vllm/entrypoints/openai/responses/context.py @@ -276,7 +276,7 @@ class ParsableContext(ConversationContext): reasoning_parser_cls: Callable[[TokenizerLike], ReasoningParser] | None, request: ResponsesRequest, available_tools: list[str] | None, - tool_parser_cls: Callable[[TokenizerLike], ToolParser] | None, + tool_parser_cls: type[ToolParser] | None, chat_template: str | None, chat_template_content_format: ChatTemplateContentFormatOption, ): diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py index 71d1945ae..a130d3686 100644 --- a/vllm/entrypoints/openai/responses/serving.py +++ b/vllm/entrypoints/openai/responses/serving.py @@ -602,7 +602,7 @@ class OpenAIServingResponses(OpenAIServing): request: ResponsesRequest, messages: list[ResponseInputOutputItem], tool_dicts: list[dict[str, Any]] | None, - tool_parser: Callable[[TokenizerLike], ToolParser] | None, + tool_parser: type[ToolParser] | None, chat_template: str | None, chat_template_content_format: ChatTemplateContentFormatOption, ): diff --git a/vllm/entrypoints/pooling/base/io_processor.py b/vllm/entrypoints/pooling/base/io_processor.py index 319bf82ff..5b09ffb49 100644 --- a/vllm/entrypoints/pooling/base/io_processor.py +++ b/vllm/entrypoints/pooling/base/io_processor.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from collections.abc import Callable, Sequence +from collections.abc import Sequence from typing import Any, Final from vllm import PoolingRequestOutput, PromptType @@ -21,7 +21,6 @@ from vllm.entrypoints.pooling.typing import ( from vllm.inputs.data import ProcessorInputs, SingletonPrompt from vllm.renderers import BaseRenderer, merge_kwargs from vllm.renderers.inputs.preprocess import parse_model_prompt, prompt_to_seq -from vllm.tokenizers import TokenizerLike from vllm.tool_parsers import ToolParser from vllm.utils.mistral import is_mistral_tokenizer @@ -167,7 +166,7 @@ class PoolingIOProcessor: default_template_content_format: ChatTemplateContentFormatOption, default_template_kwargs: dict[str, Any] | None, tool_dicts: list[dict[str, Any]] | None = None, - tool_parser: Callable[[TokenizerLike], ToolParser] | None = None, + tool_parser: type[ToolParser] | None = None, ) -> tuple[list[ConversationMessage], list[ProcessorInputs]]: renderer = self.renderer diff --git a/vllm/entrypoints/serve/render/serving.py b/vllm/entrypoints/serve/render/serving.py index d1c5acad8..a6d2f5040 100644 --- a/vllm/entrypoints/serve/render/serving.py +++ b/vllm/entrypoints/serve/render/serving.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from collections.abc import Callable, Sequence +from collections.abc import Sequence from http import HTTPStatus from typing import Any @@ -45,7 +45,6 @@ from vllm.renderers.inputs.preprocess import ( parse_model_prompt, prompt_to_seq, ) -from vllm.tokenizers import TokenizerLike from vllm.tool_parsers import ToolParser from vllm.utils import random_uuid from vllm.utils.mistral import is_mistral_tokenizer @@ -84,12 +83,10 @@ class OpenAIServingRender: self.trust_request_chat_template = trust_request_chat_template self.enable_auto_tools = enable_auto_tools self.exclude_tools_when_tool_choice_none = exclude_tools_when_tool_choice_none - self.tool_parser: Callable[[TokenizerLike], ToolParser] | None = ( - ParserManager.get_tool_parser( - tool_parser_name=tool_parser, - enable_auto_tools=enable_auto_tools, - model_name=model_config.model, - ) + self.tool_parser: type[ToolParser] | None = ParserManager.get_tool_parser( + tool_parser_name=tool_parser, + enable_auto_tools=enable_auto_tools, + model_name=model_config.model, ) self.default_chat_template_kwargs: dict[str, Any] = ( default_chat_template_kwargs or {} @@ -499,7 +496,7 @@ class OpenAIServingRender: default_template_content_format: ChatTemplateContentFormatOption, default_template_kwargs: dict[str, Any] | None, tool_dicts: list[dict[str, Any]] | None = None, - tool_parser: Callable[[TokenizerLike], ToolParser] | None = None, + tool_parser: type[ToolParser] | None = None, ) -> tuple[list[ConversationMessage], list[ProcessorInputs]]: """Copied from OpenAIServing._preprocess_chat.""" renderer = self.renderer