diff --git a/vllm/multimodal/registry.py b/vllm/multimodal/registry.py index 5b1890de0..e29c7bda0 100644 --- a/vllm/multimodal/registry.py +++ b/vllm/multimodal/registry.py @@ -254,7 +254,7 @@ class MultiModalRegistry: observability_config: "ObservabilityConfig | None" = None, tokenizer: TokenizerLike | None = None, ) -> InputProcessingContext: - if tokenizer is None and not model_config.skip_tokenizer_init: + if tokenizer is None: tokenizer = cached_tokenizer_from_config(model_config) return InputProcessingContext( diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index 454e20ad1..855758d21 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -106,10 +106,7 @@ class AsyncLLM(EngineClient): "enabling logging without default stat loggers." ) - if self.model_config.skip_tokenizer_init: - tokenizer = None - else: - tokenizer = cached_tokenizer_from_config(self.model_config) + tokenizer = cached_tokenizer_from_config(self.model_config) self.input_processor = InputProcessor(self.vllm_config, tokenizer) self.io_processor = get_io_processor( diff --git a/vllm/v1/engine/llm_engine.py b/vllm/v1/engine/llm_engine.py index c02143c72..78eeb70f1 100644 --- a/vllm/v1/engine/llm_engine.py +++ b/vllm/v1/engine/llm_engine.py @@ -84,10 +84,7 @@ class LLMEngine: self.dp_group = None self.should_execute_dummy_batch = False - if self.model_config.skip_tokenizer_init: - tokenizer = None - else: - tokenizer = cached_tokenizer_from_config(self.model_config) + tokenizer = cached_tokenizer_from_config(self.model_config) self.input_processor = InputProcessor(self.vllm_config, tokenizer) self.io_processor = get_io_processor(