[Chore] Move tokenizer initialization methods (#29793)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -444,7 +444,7 @@ def load_weights_using_from_2_way_softmax(
|
||||
)
|
||||
loaded_weights = pooling_model_cls.load_weights(model, weights, load_lm_head=True)
|
||||
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
|
||||
tokenizer = get_tokenizer(
|
||||
model_config.tokenizer,
|
||||
@@ -498,7 +498,7 @@ def load_weights_no_post_processing(model, weights: Iterable[tuple[str, torch.Te
|
||||
# Skip ModelForSequenceClassification in MRO to avoid infinite recursion
|
||||
loaded_weights = type(model).__mro__[1].load_weights(model, weights)
|
||||
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
|
||||
tokenizer = get_tokenizer(
|
||||
model_config.tokenizer,
|
||||
|
||||
@@ -45,6 +45,7 @@ from vllm.multimodal.processing import (
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||
from vllm.sampling_params import SamplingParams
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.tokenizers import cached_tokenizer_from_config
|
||||
from vllm.transformers_utils.configs.deepseek_vl2 import DeepseekVLV2Config
|
||||
from vllm.transformers_utils.processors.deepseek_ocr import (
|
||||
BASE_SIZE,
|
||||
@@ -53,7 +54,6 @@ from vllm.transformers_utils.processors.deepseek_ocr import (
|
||||
DeepseekOCRProcessor,
|
||||
count_tiles,
|
||||
)
|
||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
from vllm.v1.sample.logits_processor import (
|
||||
AdapterLogitsProcessor,
|
||||
|
||||
@@ -41,13 +41,13 @@ from vllm.multimodal.processing import (
|
||||
)
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.tokenizers import cached_tokenizer_from_config
|
||||
from vllm.transformers_utils.configs.deepseek_vl2 import (
|
||||
DeepseekVLV2Config,
|
||||
MlpProjectorConfig,
|
||||
VisionEncoderConfig,
|
||||
)
|
||||
from vllm.transformers_utils.processors.deepseek_vl2 import DeepseekVLV2Processor
|
||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
from vllm.utils.torch_utils import set_default_torch_dtype
|
||||
|
||||
|
||||
@@ -59,8 +59,8 @@ from vllm.multimodal.processing import (
|
||||
)
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.transformers_utils.processor import cached_get_processor
|
||||
from vllm.transformers_utils.tokenizer import cached_get_tokenizer
|
||||
from vllm.tokenizers import cached_tokenizer_from_config
|
||||
from vllm.transformers_utils.processor import cached_processor_from_config
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
|
||||
from .blip2 import Blip2QFormerModel
|
||||
@@ -862,7 +862,7 @@ class GraniteSpeechForConditionalGeneration(
|
||||
else:
|
||||
raise ValueError(f"Unsupported task type {task_type}")
|
||||
|
||||
tokenizer = cached_get_tokenizer(model_config.model)
|
||||
tokenizer = cached_tokenizer_from_config(model_config)
|
||||
chat = [dict(role="user", content=user_prompt)]
|
||||
prompt = tokenizer.apply_chat_template(
|
||||
chat,
|
||||
@@ -886,7 +886,7 @@ class GraniteSpeechForConditionalGeneration(
|
||||
model_config: ModelConfig,
|
||||
) -> int | None:
|
||||
"""Get the number of audio tokens for an audio duration in sec."""
|
||||
processor = cached_get_processor(model_config.model)
|
||||
processor = cached_processor_from_config(model_config)
|
||||
hop_length = processor.audio_processor.melspec_kwargs["hop_length"]
|
||||
proj_win_size = processor.audio_processor.projector_window_size
|
||||
ds_rate = processor.audio_processor.projector_downsample_rate
|
||||
|
||||
@@ -19,7 +19,7 @@ from vllm.model_executor.layers.pooler import (
|
||||
)
|
||||
from vllm.model_executor.models.llama import LlamaForCausalLM
|
||||
from vllm.tasks import PoolingTask
|
||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
||||
from vllm.tokenizers import cached_tokenizer_from_config
|
||||
from vllm.v1.outputs import PoolerOutput
|
||||
from vllm.v1.pool.metadata import PoolingMetadata
|
||||
|
||||
|
||||
@@ -73,12 +73,9 @@ from vllm.multimodal.processing import (
|
||||
)
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.tokenizers import TokenizerLike, cached_tokenizer_from_config
|
||||
from vllm.transformers_utils.configs.radio import RadioConfig
|
||||
from vllm.transformers_utils.tokenizer import (
|
||||
cached_tokenizer_from_config,
|
||||
encode_tokens,
|
||||
)
|
||||
from vllm.transformers_utils.tokenizer import encode_tokens
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
|
||||
from .utils import _merge_multimodal_embeddings
|
||||
|
||||
@@ -59,8 +59,7 @@ from vllm.multimodal.processing import (
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.tokenizers import MistralTokenizer
|
||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
||||
from vllm.tokenizers import MistralTokenizer, cached_tokenizer_from_config
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
|
||||
from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
|
||||
|
||||
@@ -51,8 +51,7 @@ from vllm.multimodal.processing import (
|
||||
)
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.tokenizers import MistralTokenizer
|
||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
||||
from vllm.tokenizers import MistralTokenizer, cached_tokenizer_from_config
|
||||
|
||||
from .interfaces import SupportsLoRA, SupportsMultiModal, SupportsTranscription
|
||||
from .utils import init_vllm_registered_model, maybe_prefix
|
||||
|
||||
@@ -48,7 +48,7 @@ from vllm.multimodal.processing import (
|
||||
PromptUpdate,
|
||||
)
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||
from vllm.transformers_utils.processor import cached_get_processor
|
||||
from vllm.transformers_utils.processor import cached_processor_from_config
|
||||
from vllm.utils.jsontree import json_map_leaves
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
from vllm.utils.torch_utils import set_default_torch_dtype
|
||||
@@ -850,7 +850,7 @@ class WhisperForConditionalGeneration(
|
||||
def get_speech_to_text_config(
|
||||
cls, model_config: ModelConfig, task_type: str
|
||||
) -> SpeechToTextConfig:
|
||||
processor = cached_get_processor(model_config.model)
|
||||
processor = cached_processor_from_config(model_config)
|
||||
|
||||
return SpeechToTextConfig(
|
||||
max_audio_clip_s=processor.feature_extractor.chunk_length,
|
||||
@@ -864,7 +864,7 @@ class WhisperForConditionalGeneration(
|
||||
stt_config: SpeechToTextConfig,
|
||||
model_config: ModelConfig,
|
||||
) -> int | None:
|
||||
processor = cached_get_processor(model_config.model)
|
||||
processor = cached_processor_from_config(model_config)
|
||||
hop_length = processor.feature_extractor.hop_length
|
||||
assert hop_length is not None
|
||||
# NOTE(NickLucche) user can't pass encoder
|
||||
|
||||
Reference in New Issue
Block a user