[Misc] Refactor tokenizer interface (#29693)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -28,7 +28,7 @@ from vllm.multimodal.processing import (
|
||||
PromptUpdate,
|
||||
PromptUpdateDetails,
|
||||
)
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
|
||||
from .intern_vit import InternVisionModel
|
||||
from .internvl import (
|
||||
@@ -241,7 +241,7 @@ class H2OVLProcessor(BaseInternVLProcessor):
|
||||
def __init__(
|
||||
self,
|
||||
config: PretrainedConfig,
|
||||
tokenizer: AnyTokenizer,
|
||||
tokenizer: TokenizerLike,
|
||||
*,
|
||||
min_dynamic_patch: int | None = None,
|
||||
max_dynamic_patch: int | None = None,
|
||||
|
||||
@@ -50,7 +50,7 @@ from vllm.multimodal.processing import (
|
||||
)
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
from vllm.utils.torch_utils import set_default_torch_num_threads
|
||||
|
||||
@@ -347,7 +347,7 @@ class BaseInternVLProcessor(ABC):
|
||||
def __init__(
|
||||
self,
|
||||
config: PretrainedConfig,
|
||||
tokenizer: AnyTokenizer,
|
||||
tokenizer: TokenizerLike,
|
||||
*,
|
||||
min_dynamic_patch: int | None = None,
|
||||
max_dynamic_patch: int | None = None,
|
||||
@@ -561,7 +561,7 @@ class InternVLProcessor(BaseInternVLProcessor):
|
||||
def __init__(
|
||||
self,
|
||||
config: PretrainedConfig,
|
||||
tokenizer: AnyTokenizer,
|
||||
tokenizer: TokenizerLike,
|
||||
*,
|
||||
min_dynamic_patch: int | None = None,
|
||||
max_dynamic_patch: int | None = None,
|
||||
|
||||
@@ -73,9 +73,9 @@ from vllm.multimodal.processing import (
|
||||
)
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.transformers_utils.configs.radio import RadioConfig
|
||||
from vllm.transformers_utils.tokenizer import (
|
||||
AnyTokenizer,
|
||||
cached_tokenizer_from_config,
|
||||
encode_tokens,
|
||||
)
|
||||
@@ -284,7 +284,7 @@ class BaseNanoNemotronVLProcessor(ABC):
|
||||
def __init__(
|
||||
self,
|
||||
config: PretrainedConfig,
|
||||
tokenizer: AnyTokenizer,
|
||||
tokenizer: TokenizerLike,
|
||||
*args,
|
||||
max_num_tiles: int | None = None,
|
||||
**kwargs,
|
||||
@@ -434,7 +434,7 @@ class NanoNemotronVLProcessor(BaseNanoNemotronVLProcessor):
|
||||
def __init__(
|
||||
self,
|
||||
config: PretrainedConfig,
|
||||
tokenizer: AnyTokenizer,
|
||||
tokenizer: TokenizerLike,
|
||||
*,
|
||||
max_num_tiles: int | None = None,
|
||||
min_dynamic_patch: int | None = None,
|
||||
@@ -645,7 +645,7 @@ class NanoNemotronVLProcessor(BaseNanoNemotronVLProcessor):
|
||||
tokens_per_frame: list[int],
|
||||
frames_indices: list[int],
|
||||
frame_duration_ms: int,
|
||||
tokenizer: AnyTokenizer,
|
||||
tokenizer: TokenizerLike,
|
||||
img_start_token_ids: list[int],
|
||||
img_end_token_ids: list[int],
|
||||
img_context_token_ids: list[int],
|
||||
@@ -670,7 +670,7 @@ class NanoNemotronVLProcessor(BaseNanoNemotronVLProcessor):
|
||||
tokens_per_frame (list[int]): number of tokens per frame
|
||||
frames_indices (list[int]): frame indices
|
||||
frame_duration_ms (int): duration of each frame in milliseconds
|
||||
tokenizer (AnyTokenizer): tokenizer to use for tokenizing frame separators
|
||||
tokenizer (TokenizerLike): tokenizer to use for tokenizing frame separators
|
||||
img_start_token_ids (list[int]): pre-tokenized IMG_START tokens
|
||||
img_end_token_ids (list[int]): pre-tokenized IMG_END tokens
|
||||
img_context_token_ids (list[int]): pre-tokenized IMG_CONTEXT tokens
|
||||
|
||||
@@ -34,8 +34,8 @@ from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||
from vllm.multimodal.image import convert_image_mode
|
||||
from vllm.multimodal.processing import PromptUpdateDetails
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.transformers_utils.processor import cached_image_processor_from_config
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||
|
||||
from .interfaces import (
|
||||
MultiModalEmbeddings,
|
||||
@@ -203,7 +203,7 @@ class NemotronVLProcessor(InternVLProcessor):
|
||||
def __init__(
|
||||
self,
|
||||
config: PretrainedConfig,
|
||||
tokenizer: AnyTokenizer,
|
||||
tokenizer: TokenizerLike,
|
||||
image_processor: BaseImageProcessorFast,
|
||||
*,
|
||||
min_dynamic_patch: int | None = None,
|
||||
|
||||
@@ -31,7 +31,7 @@ from vllm.multimodal.processing import (
|
||||
PromptReplacement,
|
||||
PromptUpdate,
|
||||
)
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
|
||||
from .qwen2_5_vl import (
|
||||
Qwen2_5_VisionTransformer as OpenCUAVisionTransformer,
|
||||
@@ -79,7 +79,7 @@ class OpenCUAProcessor(Qwen2VLProcessor):
|
||||
def __init__(
|
||||
self,
|
||||
vision_config: dict,
|
||||
tokenizer: AnyTokenizer,
|
||||
tokenizer: TokenizerLike,
|
||||
**kwargs,
|
||||
):
|
||||
image_processor = Qwen2VLImageProcessor(**vision_config)
|
||||
|
||||
@@ -59,10 +59,8 @@ from vllm.multimodal.processing import (
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.transformers_utils.tokenizer import (
|
||||
MistralTokenizer,
|
||||
cached_tokenizer_from_config,
|
||||
)
|
||||
from vllm.tokenizers import MistralTokenizer
|
||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
|
||||
from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
|
||||
|
||||
@@ -91,7 +91,7 @@ from vllm.multimodal.processing import (
|
||||
)
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
|
||||
from .interfaces import (
|
||||
@@ -1533,7 +1533,7 @@ class Tarsier2Processor(Qwen2VLProcessor):
|
||||
def __init__(
|
||||
self,
|
||||
vision_config: dict,
|
||||
tokenizer: AnyTokenizer,
|
||||
tokenizer: TokenizerLike,
|
||||
**kwargs,
|
||||
):
|
||||
self.image_processor = Tarsier2ImageProcessor(**vision_config)
|
||||
|
||||
@@ -47,7 +47,7 @@ from vllm.multimodal.processing import (
|
||||
)
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
|
||||
from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
|
||||
@@ -282,7 +282,7 @@ class SkyworkR1VProcessor:
|
||||
def __init__(
|
||||
self,
|
||||
config: PretrainedConfig,
|
||||
tokenizer: AnyTokenizer,
|
||||
tokenizer: TokenizerLike,
|
||||
*,
|
||||
min_dynamic_patch: int | None = None,
|
||||
max_dynamic_patch: int | None = None,
|
||||
|
||||
@@ -43,8 +43,8 @@ from vllm.multimodal.processing import (
|
||||
)
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.transformers_utils.configs import Step3VisionEncoderConfig
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
|
||||
from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
|
||||
@@ -321,7 +321,7 @@ class Step3VLProcessor:
|
||||
def __init__(
|
||||
self,
|
||||
config: PretrainedConfig,
|
||||
tokenizer: AnyTokenizer,
|
||||
tokenizer: TokenizerLike,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
|
||||
|
||||
@@ -51,10 +51,8 @@ from vllm.multimodal.processing import (
|
||||
)
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.transformers_utils.tokenizer import (
|
||||
MistralTokenizer,
|
||||
cached_tokenizer_from_config,
|
||||
)
|
||||
from vllm.tokenizers import MistralTokenizer
|
||||
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
|
||||
|
||||
from .interfaces import SupportsLoRA, SupportsMultiModal, SupportsTranscription
|
||||
from .utils import init_vllm_registered_model, maybe_prefix
|
||||
|
||||
Reference in New Issue
Block a user