diff --git a/vllm/engine/protocol.py b/vllm/engine/protocol.py index dd31de840..1502bbff4 100644 --- a/vllm/engine/protocol.py +++ b/vllm/engine/protocol.py @@ -11,7 +11,7 @@ from vllm.lora.request import LoRARequest from vllm.outputs import PoolingRequestOutput, RequestOutput from vllm.plugins.io_processors import IOProcessor from vllm.pooling_params import PoolingParams -from vllm.renderers import RendererLike +from vllm.renderers import BaseRenderer from vllm.sampling_params import SamplingParams from vllm.tasks import SupportedTask from vllm.v1.engine import EngineCoreRequest @@ -28,7 +28,7 @@ class EngineClient(ABC): @property @abstractmethod - def renderer(self) -> RendererLike: ... + def renderer(self) -> BaseRenderer: ... @property @abstractmethod diff --git a/vllm/renderers/__init__.py b/vllm/renderers/__init__.py index d8f76dd87..58d9ed70a 100644 --- a/vllm/renderers/__init__.py +++ b/vllm/renderers/__init__.py @@ -2,11 +2,11 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from .params import ChatParams, TokenizeParams, merge_kwargs -from .protocol import RendererLike +from .protocol import BaseRenderer from .registry import RendererRegistry, renderer_from_config __all__ = [ - "RendererLike", + "BaseRenderer", "RendererRegistry", "renderer_from_config", "ChatParams", diff --git a/vllm/renderers/deepseek_v32.py b/vllm/renderers/deepseek_v32.py index 91b95db06..9e6008c55 100644 --- a/vllm/renderers/deepseek_v32.py +++ b/vllm/renderers/deepseek_v32.py @@ -15,18 +15,18 @@ from vllm.tokenizers import cached_get_tokenizer from vllm.tokenizers.deepseek_v32 import DeepseekV32Tokenizer from .params import ChatParams -from .protocol import RendererLike +from .protocol import BaseRenderer logger = init_logger(__name__) -class DeepseekV32Renderer(RendererLike): +class DeepseekV32Renderer(BaseRenderer): @classmethod def from_config( cls, config: ModelConfig, tokenizer_kwargs: dict[str, Any], - ) -> "RendererLike": + ) -> "BaseRenderer": return cls(config, tokenizer_kwargs) def __init__( @@ -34,9 +34,7 @@ class DeepseekV32Renderer(RendererLike): config: ModelConfig, tokenizer_kwargs: dict[str, Any], ) -> None: - super().__init__() - - self.config = config + super().__init__(config) if config.skip_tokenizer_init: tokenizer = None diff --git a/vllm/renderers/grok2.py b/vllm/renderers/grok2.py index feefe8f0b..c1b56b397 100644 --- a/vllm/renderers/grok2.py +++ b/vllm/renderers/grok2.py @@ -15,18 +15,18 @@ from vllm.tokenizers import cached_get_tokenizer from vllm.tokenizers.grok2 import Grok2Tokenizer from .params import ChatParams -from .protocol import RendererLike +from .protocol import BaseRenderer logger = init_logger(__name__) -class Grok2Renderer(RendererLike): +class Grok2Renderer(BaseRenderer): @classmethod def from_config( cls, config: ModelConfig, tokenizer_kwargs: dict[str, Any], - ) -> "RendererLike": + ) -> "BaseRenderer": return cls(config, tokenizer_kwargs) def __init__( @@ -34,9 +34,7 @@ class Grok2Renderer(RendererLike): config: ModelConfig, tokenizer_kwargs: dict[str, Any], ) -> None: - super().__init__() - - self.config = config + super().__init__(config) if config.skip_tokenizer_init: tokenizer = None diff --git a/vllm/renderers/hf.py b/vllm/renderers/hf.py index eb90c9e6d..079dd0953 100644 --- a/vllm/renderers/hf.py +++ b/vllm/renderers/hf.py @@ -34,7 +34,7 @@ from vllm.transformers_utils.processor import cached_get_processor from vllm.utils.func_utils import supports_kw from .params import ChatParams -from .protocol import RendererLike +from .protocol import BaseRenderer if TYPE_CHECKING: from vllm.multimodal.inputs import MultiModalDataDict, MultiModalUUIDDict @@ -584,13 +584,13 @@ def replace_vision_chunk_video_placeholder( return prompt_raw -class HfRenderer(RendererLike): +class HfRenderer(BaseRenderer): @classmethod def from_config( cls, config: ModelConfig, tokenizer_kwargs: dict[str, Any], - ) -> "RendererLike": + ) -> "BaseRenderer": return cls(config, tokenizer_kwargs) def __init__( @@ -598,9 +598,8 @@ class HfRenderer(RendererLike): config: ModelConfig, tokenizer_kwargs: dict[str, Any], ) -> None: - super().__init__() + super().__init__(config) - self.config = config self.use_unified_vision_chunk = getattr( config.hf_config, "use_unified_vision_chunk", False ) diff --git a/vllm/renderers/mistral.py b/vllm/renderers/mistral.py index 109690dd7..dcccb09b9 100644 --- a/vllm/renderers/mistral.py +++ b/vllm/renderers/mistral.py @@ -17,7 +17,7 @@ from vllm.tokenizers.mistral import MistralTokenizer from vllm.utils.async_utils import make_async from .params import ChatParams -from .protocol import RendererLike +from .protocol import BaseRenderer logger = init_logger(__name__) @@ -49,13 +49,13 @@ def safe_apply_chat_template( raise ValueError(str(e)) from e -class MistralRenderer(RendererLike): +class MistralRenderer(BaseRenderer): @classmethod def from_config( cls, config: ModelConfig, tokenizer_kwargs: dict[str, Any], - ) -> "RendererLike": + ) -> "BaseRenderer": return cls(config, tokenizer_kwargs) def __init__( @@ -63,9 +63,7 @@ class MistralRenderer(RendererLike): config: ModelConfig, tokenizer_kwargs: dict[str, Any], ) -> None: - super().__init__() - - self.config = config + super().__init__(config) if config.skip_tokenizer_init: tokenizer = None diff --git a/vllm/renderers/protocol.py b/vllm/renderers/protocol.py index 71d30c3b8..676026965 100644 --- a/vllm/renderers/protocol.py +++ b/vllm/renderers/protocol.py @@ -1,7 +1,8 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import asyncio -from typing import TYPE_CHECKING, Any, Protocol +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Any from vllm.inputs import EmbedsPrompt, TextPrompt, TokensPrompt from vllm.tokenizers import TokenizerLike @@ -19,19 +20,26 @@ if TYPE_CHECKING: ) -class RendererLike(Protocol): - config: "ModelConfig" - _async_tokenizer: AsyncMicrobatchTokenizer - +class BaseRenderer(ABC): @classmethod + @abstractmethod def from_config( cls, config: "ModelConfig", tokenizer_kwargs: dict[str, Any], - ) -> "RendererLike": + ) -> "BaseRenderer": raise NotImplementedError + def __init__(self, config: "ModelConfig") -> None: + super().__init__() + + self.config = config + + # Lazy initialization since offline LLM doesn't use async + self._async_tokenizer: AsyncMicrobatchTokenizer | None = None + @property + @abstractmethod def tokenizer(self) -> TokenizerLike | None: raise NotImplementedError @@ -43,8 +51,7 @@ class RendererLike(Protocol): return tokenizer def get_async_tokenizer(self) -> AsyncMicrobatchTokenizer: - # Lazy initialization since offline LLM doesn't use async - if not hasattr(self, "_async_tokenizer"): + if self._async_tokenizer is None: self._async_tokenizer = AsyncMicrobatchTokenizer(self.get_tokenizer()) return self._async_tokenizer @@ -104,6 +111,7 @@ class RendererLike(Protocol): ) -> list[TextPrompt | TokensPrompt | EmbedsPrompt]: return self.render_completions(prompt_input, prompt_embeds) + @abstractmethod def render_messages( self, messages: list["ChatCompletionMessageParam"], diff --git a/vllm/renderers/registry.py b/vllm/renderers/registry.py index 5269978b5..dde17a6f9 100644 --- a/vllm/renderers/registry.py +++ b/vllm/renderers/registry.py @@ -7,7 +7,7 @@ from vllm.logger import init_logger from vllm.tokenizers.registry import tokenizer_args_from_config from vllm.utils.import_utils import resolve_obj_by_qualname -from .protocol import RendererLike +from .protocol import BaseRenderer if TYPE_CHECKING: from vllm.config import ModelConfig @@ -43,7 +43,7 @@ class RendererRegistry: return None - def load_renderer_cls(self, renderer_mode: str) -> type[RendererLike]: + def load_renderer_cls(self, renderer_mode: str) -> type[BaseRenderer]: if renderer_mode not in self.renderers: raise ValueError(f"No renderer registered for {renderer_mode=!r}.") @@ -57,7 +57,7 @@ class RendererRegistry: renderer_mode: str, config: "ModelConfig", tokenizer_kwargs: dict[str, Any], - ) -> RendererLike: + ) -> BaseRenderer: renderer_cls = self.load_renderer_cls(renderer_mode) return renderer_cls.from_config(config, tokenizer_kwargs) diff --git a/vllm/renderers/terratorch.py b/vllm/renderers/terratorch.py index 23f8eecc2..8753d1f0f 100644 --- a/vllm/renderers/terratorch.py +++ b/vllm/renderers/terratorch.py @@ -14,24 +14,22 @@ from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike from .params import ChatParams -from .protocol import RendererLike +from .protocol import BaseRenderer logger = init_logger(__name__) -class TerratorchRenderer(RendererLike): +class TerratorchRenderer(BaseRenderer): @classmethod def from_config( cls, config: "ModelConfig", tokenizer_kwargs: dict[str, Any], - ) -> "RendererLike": + ) -> "BaseRenderer": return cls(config) def __init__(self, config: ModelConfig) -> None: - super().__init__() - - self.config = config + super().__init__(config) if not config.skip_tokenizer_init: raise ValueError("Terratorch renderer requires `skip_tokenizer_init=True`") diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index f1788d8fc..d0df49f53 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -24,7 +24,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry from vllm.outputs import STREAM_FINISHED, PoolingRequestOutput, RequestOutput from vllm.plugins.io_processors import get_io_processor from vllm.pooling_params import PoolingParams -from vllm.renderers import RendererLike, merge_kwargs +from vllm.renderers import BaseRenderer, merge_kwargs from vllm.sampling_params import RequestOutputKind, SamplingParams from vllm.tasks import SupportedTask from vllm.tokenizers import TokenizerLike @@ -844,7 +844,7 @@ class AsyncLLM(EngineClient): return self.input_processor.get_tokenizer() @property - def renderer(self) -> RendererLike: + def renderer(self) -> BaseRenderer: return self.input_processor.renderer async def is_tracing_enabled(self) -> bool: diff --git a/vllm/v1/engine/input_processor.py b/vllm/v1/engine/input_processor.py index c3d86e819..5abf8d609 100644 --- a/vllm/v1/engine/input_processor.py +++ b/vllm/v1/engine/input_processor.py @@ -29,7 +29,7 @@ from vllm.multimodal.parse import ModalityDataItems, MultiModalDataItems from vllm.multimodal.processing.context import set_request_id from vllm.multimodal.utils import argsort_mm_positions from vllm.pooling_params import PoolingParams -from vllm.renderers import RendererLike +from vllm.renderers import BaseRenderer from vllm.sampling_params import _SAMPLING_EPS, SamplingParams from vllm.tokenizers import TokenizerLike from vllm.tokenizers.mistral import MistralTokenizer @@ -96,7 +96,7 @@ class InputProcessor: return self.input_preprocessor.get_tokenizer() @property - def renderer(self) -> RendererLike: + def renderer(self) -> BaseRenderer: return self.input_preprocessor.renderer def _validate_logprobs( diff --git a/vllm/v1/engine/llm_engine.py b/vllm/v1/engine/llm_engine.py index 8f72b6178..4872992b6 100644 --- a/vllm/v1/engine/llm_engine.py +++ b/vllm/v1/engine/llm_engine.py @@ -21,7 +21,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry from vllm.outputs import PoolingRequestOutput, RequestOutput from vllm.plugins.io_processors import get_io_processor from vllm.pooling_params import PoolingParams -from vllm.renderers import RendererLike +from vllm.renderers import BaseRenderer from vllm.sampling_params import SamplingParams from vllm.tasks import SupportedTask from vllm.tokenizers import TokenizerLike @@ -367,7 +367,7 @@ class LLMEngine: return self.input_processor.get_tokenizer() @property - def renderer(self) -> RendererLike: + def renderer(self) -> BaseRenderer: return self.input_processor.renderer def do_log_stats(self) -> None: