[Chore] Move tokenizer initialization methods (#29793)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-12-02 13:33:37 +08:00
committed by GitHub
parent e2fbfc955e
commit 653591d5e7
51 changed files with 150 additions and 129 deletions

View File

@@ -444,7 +444,7 @@ def load_weights_using_from_2_way_softmax(
)
loaded_weights = pooling_model_cls.load_weights(model, weights, load_lm_head=True)
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.tokenizers import get_tokenizer
tokenizer = get_tokenizer(
model_config.tokenizer,
@@ -498,7 +498,7 @@ def load_weights_no_post_processing(model, weights: Iterable[tuple[str, torch.Te
# Skip ModelForSequenceClassification in MRO to avoid infinite recursion
loaded_weights = type(model).__mro__[1].load_weights(model, weights)
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.tokenizers import get_tokenizer
tokenizer = get_tokenizer(
model_config.tokenizer,

View File

@@ -45,6 +45,7 @@ from vllm.multimodal.processing import (
from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.sampling_params import SamplingParams
from vllm.sequence import IntermediateTensors
from vllm.tokenizers import cached_tokenizer_from_config
from vllm.transformers_utils.configs.deepseek_vl2 import DeepseekVLV2Config
from vllm.transformers_utils.processors.deepseek_ocr import (
BASE_SIZE,
@@ -53,7 +54,6 @@ from vllm.transformers_utils.processors.deepseek_ocr import (
DeepseekOCRProcessor,
count_tiles,
)
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
from vllm.utils.tensor_schema import TensorSchema, TensorShape
from vllm.v1.sample.logits_processor import (
AdapterLogitsProcessor,

View File

@@ -41,13 +41,13 @@ from vllm.multimodal.processing import (
)
from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.sequence import IntermediateTensors
from vllm.tokenizers import cached_tokenizer_from_config
from vllm.transformers_utils.configs.deepseek_vl2 import (
DeepseekVLV2Config,
MlpProjectorConfig,
VisionEncoderConfig,
)
from vllm.transformers_utils.processors.deepseek_vl2 import DeepseekVLV2Processor
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
from vllm.utils.tensor_schema import TensorSchema, TensorShape
from vllm.utils.torch_utils import set_default_torch_dtype

View File

@@ -59,8 +59,8 @@ from vllm.multimodal.processing import (
)
from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.processor import cached_get_processor
from vllm.transformers_utils.tokenizer import cached_get_tokenizer
from vllm.tokenizers import cached_tokenizer_from_config
from vllm.transformers_utils.processor import cached_processor_from_config
from vllm.utils.tensor_schema import TensorSchema, TensorShape
from .blip2 import Blip2QFormerModel
@@ -862,7 +862,7 @@ class GraniteSpeechForConditionalGeneration(
else:
raise ValueError(f"Unsupported task type {task_type}")
tokenizer = cached_get_tokenizer(model_config.model)
tokenizer = cached_tokenizer_from_config(model_config)
chat = [dict(role="user", content=user_prompt)]
prompt = tokenizer.apply_chat_template(
chat,
@@ -886,7 +886,7 @@ class GraniteSpeechForConditionalGeneration(
model_config: ModelConfig,
) -> int | None:
"""Get the number of audio tokens for an audio duration in sec."""
processor = cached_get_processor(model_config.model)
processor = cached_processor_from_config(model_config)
hop_length = processor.audio_processor.melspec_kwargs["hop_length"]
proj_win_size = processor.audio_processor.projector_window_size
ds_rate = processor.audio_processor.projector_downsample_rate

View File

@@ -19,7 +19,7 @@ from vllm.model_executor.layers.pooler import (
)
from vllm.model_executor.models.llama import LlamaForCausalLM
from vllm.tasks import PoolingTask
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
from vllm.tokenizers import cached_tokenizer_from_config
from vllm.v1.outputs import PoolerOutput
from vllm.v1.pool.metadata import PoolingMetadata

View File

@@ -73,12 +73,9 @@ from vllm.multimodal.processing import (
)
from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.sequence import IntermediateTensors
from vllm.tokenizers import TokenizerLike
from vllm.tokenizers import TokenizerLike, cached_tokenizer_from_config
from vllm.transformers_utils.configs.radio import RadioConfig
from vllm.transformers_utils.tokenizer import (
cached_tokenizer_from_config,
encode_tokens,
)
from vllm.transformers_utils.tokenizer import encode_tokens
from vllm.utils.tensor_schema import TensorSchema, TensorShape
from .utils import _merge_multimodal_embeddings

View File

@@ -59,8 +59,7 @@ from vllm.multimodal.processing import (
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
from vllm.platforms import current_platform
from vllm.sequence import IntermediateTensors
from vllm.tokenizers import MistralTokenizer
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
from vllm.tokenizers import MistralTokenizer, cached_tokenizer_from_config
from vllm.utils.tensor_schema import TensorSchema, TensorShape
from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP

View File

@@ -51,8 +51,7 @@ from vllm.multimodal.processing import (
)
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
from vllm.sequence import IntermediateTensors
from vllm.tokenizers import MistralTokenizer
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
from vllm.tokenizers import MistralTokenizer, cached_tokenizer_from_config
from .interfaces import SupportsLoRA, SupportsMultiModal, SupportsTranscription
from .utils import init_vllm_registered_model, maybe_prefix

View File

@@ -48,7 +48,7 @@ from vllm.multimodal.processing import (
PromptUpdate,
)
from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.transformers_utils.processor import cached_get_processor
from vllm.transformers_utils.processor import cached_processor_from_config
from vllm.utils.jsontree import json_map_leaves
from vllm.utils.tensor_schema import TensorSchema, TensorShape
from vllm.utils.torch_utils import set_default_torch_dtype
@@ -850,7 +850,7 @@ class WhisperForConditionalGeneration(
def get_speech_to_text_config(
cls, model_config: ModelConfig, task_type: str
) -> SpeechToTextConfig:
processor = cached_get_processor(model_config.model)
processor = cached_processor_from_config(model_config)
return SpeechToTextConfig(
max_audio_clip_s=processor.feature_extractor.chunk_length,
@@ -864,7 +864,7 @@ class WhisperForConditionalGeneration(
stt_config: SpeechToTextConfig,
model_config: ModelConfig,
) -> int | None:
processor = cached_get_processor(model_config.model)
processor = cached_processor_from_config(model_config)
hop_length = processor.feature_extractor.hop_length
assert hop_length is not None
# NOTE(NickLucche) user can't pass encoder