[Frontend] Consolidate tokenizer init code (#26276)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -37,15 +37,13 @@ class Processor:
|
||||
def __init__(
|
||||
self,
|
||||
vllm_config: VllmConfig,
|
||||
tokenizer: AnyTokenizer,
|
||||
mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY,
|
||||
):
|
||||
) -> None:
|
||||
self.vllm_config = vllm_config
|
||||
self.model_config = vllm_config.model_config
|
||||
self.cache_config = vllm_config.cache_config
|
||||
self.lora_config = vllm_config.lora_config
|
||||
self.structured_outputs_config = vllm_config.structured_outputs_config
|
||||
self.tokenizer = tokenizer
|
||||
|
||||
self.generation_config_fields = self.model_config.try_get_generation_config()
|
||||
|
||||
@@ -54,11 +52,18 @@ class Processor:
|
||||
|
||||
self.input_preprocessor = InputPreprocessor(
|
||||
self.model_config,
|
||||
self.tokenizer,
|
||||
mm_registry,
|
||||
mm_processor_cache=self.mm_processor_cache,
|
||||
)
|
||||
|
||||
@property
|
||||
def tokenizer(self) -> Optional[AnyTokenizer]:
|
||||
return self.input_preprocessor.tokenizer
|
||||
|
||||
@tokenizer.setter
|
||||
def tokenizer(self, tokenizer: Optional[AnyTokenizer]) -> None:
|
||||
self.input_preprocessor.tokenizer = tokenizer
|
||||
|
||||
def _validate_logprobs(
|
||||
self,
|
||||
params: SamplingParams,
|
||||
@@ -511,10 +516,8 @@ class Processor:
|
||||
else:
|
||||
raise ValueError(f"The {prompt_type} prompt cannot be empty")
|
||||
|
||||
if self.model_config.skip_tokenizer_init:
|
||||
tokenizer = None
|
||||
else:
|
||||
tokenizer = self.tokenizer
|
||||
tokenizer = self.tokenizer
|
||||
if tokenizer is not None:
|
||||
max_input_id = max(prompt_ids or [], default=0)
|
||||
|
||||
# NOTE: tokenizer.max_token_id is the tokenizer’s vocab size while
|
||||
|
||||
Reference in New Issue
Block a user