[Frontend] Consolidate tokenizer init code (#26276)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-10-06 17:34:52 +08:00
parent 77c95f72f7
commit 391612e78b
8 changed files with 46 additions and 70 deletions
--- a/vllm/v1/engine/processor.py
+++ b/vllm/v1/engine/processor.py
@@ -37,15 +37,13 @@ class Processor:
    def __init__(
        self,
        vllm_config: VllmConfig,
-        tokenizer: AnyTokenizer,
        mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY,
-    ):
+    ) -> None:
        self.vllm_config = vllm_config
        self.model_config = vllm_config.model_config
        self.cache_config = vllm_config.cache_config
        self.lora_config = vllm_config.lora_config
        self.structured_outputs_config = vllm_config.structured_outputs_config
-        self.tokenizer = tokenizer

        self.generation_config_fields = self.model_config.try_get_generation_config()

@@ -54,11 +52,18 @@ class Processor:

        self.input_preprocessor = InputPreprocessor(
            self.model_config,
-            self.tokenizer,
            mm_registry,
            mm_processor_cache=self.mm_processor_cache,
        )

+    @property
+    def tokenizer(self) -> Optional[AnyTokenizer]:
+        return self.input_preprocessor.tokenizer
+
+    @tokenizer.setter
+    def tokenizer(self, tokenizer: Optional[AnyTokenizer]) -> None:
+        self.input_preprocessor.tokenizer = tokenizer
+
    def _validate_logprobs(
        self,
        params: SamplingParams,
@@ -511,10 +516,8 @@ class Processor:
            else:
                raise ValueError(f"The {prompt_type} prompt cannot be empty")

-        if self.model_config.skip_tokenizer_init:
-            tokenizer = None
-        else:
-            tokenizer = self.tokenizer
+        tokenizer = self.tokenizer
+        if tokenizer is not None:
            max_input_id = max(prompt_ids or [], default=0)

            # NOTE: tokenizer.max_token_id is the tokenizer’s vocab size while