[Frontend] Consolidate tokenizer init code (#26276)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-10-06 17:34:52 +08:00
committed by GitHub
parent 77c95f72f7
commit 391612e78b
8 changed files with 46 additions and 70 deletions

View File

@@ -37,15 +37,13 @@ class Processor:
def __init__(
self,
vllm_config: VllmConfig,
tokenizer: AnyTokenizer,
mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY,
):
) -> None:
self.vllm_config = vllm_config
self.model_config = vllm_config.model_config
self.cache_config = vllm_config.cache_config
self.lora_config = vllm_config.lora_config
self.structured_outputs_config = vllm_config.structured_outputs_config
self.tokenizer = tokenizer
self.generation_config_fields = self.model_config.try_get_generation_config()
@@ -54,11 +52,18 @@ class Processor:
self.input_preprocessor = InputPreprocessor(
self.model_config,
self.tokenizer,
mm_registry,
mm_processor_cache=self.mm_processor_cache,
)
@property
def tokenizer(self) -> Optional[AnyTokenizer]:
return self.input_preprocessor.tokenizer
@tokenizer.setter
def tokenizer(self, tokenizer: Optional[AnyTokenizer]) -> None:
self.input_preprocessor.tokenizer = tokenizer
def _validate_logprobs(
self,
params: SamplingParams,
@@ -511,10 +516,8 @@ class Processor:
else:
raise ValueError(f"The {prompt_type} prompt cannot be empty")
if self.model_config.skip_tokenizer_init:
tokenizer = None
else:
tokenizer = self.tokenizer
tokenizer = self.tokenizer
if tokenizer is not None:
max_input_id = max(prompt_ids or [], default=0)
# NOTE: tokenizer.max_token_id is the tokenizers vocab size while