[Misc] Lazy import registered processors (#36024)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn> Co-authored-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
@@ -1020,18 +1020,15 @@ _MULTIMODAL_EXAMPLE_MODELS = {
|
||||
min_transformers_version="4.57",
|
||||
),
|
||||
"Qwen3ASRForConditionalGeneration": _HfExamplesInfo(
|
||||
"Qwen/Qwen3-ASR-1.7B",
|
||||
"Qwen/Qwen3-ASR-0.6B",
|
||||
max_model_len=4096,
|
||||
min_transformers_version="4.57",
|
||||
is_available_online=False,
|
||||
),
|
||||
"Qwen3ASRRealtimeGeneration": _HfExamplesInfo(
|
||||
"Qwen/Qwen3-ASR-1.7B",
|
||||
"Qwen/Qwen3-ASR-0.6B",
|
||||
max_model_len=4096,
|
||||
min_transformers_version="4.57",
|
||||
enforce_eager=True,
|
||||
hf_overrides={"architectures": ["Qwen3ASRRealtimeGeneration"]},
|
||||
is_available_online=False,
|
||||
),
|
||||
"RForConditionalGeneration": _HfExamplesInfo("YannQi/R-4B", trust_remote_code=True),
|
||||
"SkyworkR1VChatModel": _HfExamplesInfo(
|
||||
|
||||
@@ -48,7 +48,6 @@ from vllm.transformers_utils.configs.deepseek_vl2 import (
|
||||
MlpProjectorConfig,
|
||||
VisionEncoderConfig,
|
||||
)
|
||||
from vllm.transformers_utils.processors.deepseek_vl2 import DeepseekVLV2Processor
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
from vllm.utils.torch_utils import set_default_torch_dtype
|
||||
|
||||
@@ -160,7 +159,7 @@ class DeepseekVL2ProcessingInfo(BaseProcessingInfo):
|
||||
return self.ctx.get_hf_config(DeepseekVLV2Config)
|
||||
|
||||
def get_hf_processor(self, **kwargs: object):
|
||||
return self.ctx.get_hf_processor(DeepseekVLV2Processor, **kwargs)
|
||||
return self.ctx.get_hf_processor(**kwargs)
|
||||
|
||||
def get_supported_mm_limits(self) -> Mapping[str, int | None]:
|
||||
return {"image": None}
|
||||
|
||||
@@ -41,7 +41,7 @@ from vllm.multimodal.processing import (
|
||||
PromptUpdateDetails,
|
||||
)
|
||||
from vllm.transformers_utils.processor import cached_processor_from_config
|
||||
from vllm.transformers_utils.processors.fireredasr2_processor import (
|
||||
from vllm.transformers_utils.processors.fireredasr2 import (
|
||||
FireRedASR2FeatureExtractor,
|
||||
)
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
|
||||
@@ -50,7 +50,7 @@ from vllm.multimodal.processing import (
|
||||
PromptUpdate,
|
||||
)
|
||||
from vllm.transformers_utils.processor import cached_processor_from_config
|
||||
from vllm.transformers_utils.processors.funasr_processor import FunASRFeatureExtractor
|
||||
from vllm.transformers_utils.processors.funasr import FunASRFeatureExtractor
|
||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||
|
||||
from .interfaces import (
|
||||
|
||||
@@ -20,7 +20,9 @@ from transformers.video_processing_utils import BaseVideoProcessor
|
||||
from typing_extensions import TypeVar
|
||||
|
||||
from vllm.logger import init_logger
|
||||
from vllm.transformers_utils import processors
|
||||
from vllm.transformers_utils.gguf_utils import is_gguf
|
||||
from vllm.transformers_utils.repo_utils import get_hf_file_to_dict
|
||||
from vllm.transformers_utils.utils import convert_model_repo_to_path
|
||||
from vllm.utils.func_utils import get_allowed_kwarg_only_overrides
|
||||
|
||||
@@ -139,6 +141,22 @@ def _merge_mm_kwargs(
|
||||
return allowed_kwargs
|
||||
|
||||
|
||||
def get_processor_cls_name_from_config(
|
||||
processor_name: str,
|
||||
revision: str | None = "main",
|
||||
) -> str | None:
|
||||
config_file = [
|
||||
"processor_config.json",
|
||||
"preprocessor_config.json",
|
||||
"tokenizer_config.json",
|
||||
]
|
||||
for file in config_file:
|
||||
config = get_hf_file_to_dict(file, processor_name, revision=revision)
|
||||
if config and "processor_class" in config:
|
||||
return config["processor_class"]
|
||||
return None
|
||||
|
||||
|
||||
def get_processor(
|
||||
processor_name: str,
|
||||
*args: Any,
|
||||
@@ -152,8 +170,20 @@ def get_processor(
|
||||
revision = "main"
|
||||
try:
|
||||
processor_name = convert_model_repo_to_path(processor_name)
|
||||
registered_cls_name = get_processor_cls_name_from_config(
|
||||
processor_name, revision=revision
|
||||
)
|
||||
registered_processor_cls = (
|
||||
getattr(processors, registered_cls_name, None)
|
||||
if registered_cls_name
|
||||
else None
|
||||
)
|
||||
registered_processor_cls = cast(type[_P] | None, registered_processor_cls)
|
||||
# Use registered processor class when it's available
|
||||
# and explicit processor_cls is not set.
|
||||
if isinstance(processor_cls, tuple) or processor_cls == ProcessorMixin:
|
||||
processor = AutoProcessor.from_pretrained(
|
||||
_processor_cls = registered_processor_cls or AutoProcessor
|
||||
processor = _processor_cls.from_pretrained(
|
||||
processor_name,
|
||||
*args,
|
||||
revision=revision,
|
||||
|
||||
@@ -8,16 +8,20 @@ reasons:
|
||||
- There is a need to override the existing processor to support vLLM.
|
||||
"""
|
||||
|
||||
from vllm.transformers_utils.processors.bagel import BagelProcessor
|
||||
from vllm.transformers_utils.processors.deepseek_vl2 import DeepseekVLV2Processor
|
||||
from vllm.transformers_utils.processors.fireredasr2_processor import (
|
||||
FireRedASR2Processor,
|
||||
)
|
||||
from vllm.transformers_utils.processors.funasr_processor import FunASRProcessor
|
||||
from vllm.transformers_utils.processors.hunyuan_vl import HunYuanVLProcessor
|
||||
from vllm.transformers_utils.processors.hunyuan_vl_image import HunYuanVLImageProcessor
|
||||
from vllm.transformers_utils.processors.ovis import OvisProcessor
|
||||
from vllm.transformers_utils.processors.ovis2_5 import Ovis2_5Processor
|
||||
import importlib
|
||||
|
||||
_CLASS_TO_MODULE: dict[str, str] = {
|
||||
"BagelProcessor": "vllm.transformers_utils.processors.bagel",
|
||||
"DeepseekVLV2Processor": "vllm.transformers_utils.processors.deepseek_vl2",
|
||||
"FireRedASR2Processor": "vllm.transformers_utils.processors.fireredasr2",
|
||||
"FunASRProcessor": "vllm.transformers_utils.processors.funasr",
|
||||
"HunYuanVLProcessor": "vllm.transformers_utils.processors.hunyuan_vl",
|
||||
"HunYuanVLImageProcessor": "vllm.transformers_utils.processors.hunyuan_vl_image",
|
||||
"OvisProcessor": "vllm.transformers_utils.processors.ovis",
|
||||
"Ovis2_5Processor": "vllm.transformers_utils.processors.ovis2_5",
|
||||
"Qwen3ASRProcessor": "vllm.transformers_utils.processors.qwen3_asr",
|
||||
}
|
||||
|
||||
|
||||
__all__ = [
|
||||
"BagelProcessor",
|
||||
@@ -28,4 +32,18 @@ __all__ = [
|
||||
"HunYuanVLImageProcessor",
|
||||
"OvisProcessor",
|
||||
"Ovis2_5Processor",
|
||||
"Qwen3ASRProcessor",
|
||||
]
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
if name in _CLASS_TO_MODULE:
|
||||
module_name = _CLASS_TO_MODULE[name]
|
||||
module = importlib.import_module(module_name)
|
||||
return getattr(module, name)
|
||||
|
||||
raise AttributeError(f"module 'processors' has no attribute '{name}'")
|
||||
|
||||
|
||||
def __dir__():
|
||||
return sorted(list(__all__))
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
# Copyright 2025 Bytedance Ltd. and/or its affiliates.
|
||||
"""BAGEL processor for image and text inputs."""
|
||||
|
||||
from transformers import AutoProcessor
|
||||
from transformers.feature_extraction_utils import BatchFeature
|
||||
from transformers.image_utils import ImageInput
|
||||
from transformers.processing_utils import ProcessingKwargs, ProcessorMixin, Unpack
|
||||
@@ -79,6 +78,3 @@ class BagelProcessor(ProcessorMixin):
|
||||
tokenizer_input_names = self.tokenizer.model_input_names
|
||||
image_processor_input_names = self.image_processor.model_input_names
|
||||
return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
|
||||
|
||||
|
||||
AutoProcessor.register("BagelProcessor", BagelProcessor)
|
||||
|
||||
@@ -8,7 +8,7 @@ from typing import Literal
|
||||
import torch
|
||||
import torchvision.transforms as T
|
||||
from PIL import Image, ImageOps
|
||||
from transformers import AutoProcessor, BatchFeature, LlamaTokenizerFast
|
||||
from transformers import BatchFeature, LlamaTokenizerFast
|
||||
from transformers.processing_utils import ProcessorMixin
|
||||
|
||||
# TODO(Isotr0py): change modes for variants
|
||||
@@ -453,6 +453,3 @@ class DeepseekOCRProcessor(ProcessorMixin):
|
||||
num_image_tokens,
|
||||
image_shapes,
|
||||
)
|
||||
|
||||
|
||||
AutoProcessor.register("DeepseekOCRProcessor", DeepseekOCRProcessor)
|
||||
|
||||
@@ -29,7 +29,7 @@ from typing import Any
|
||||
import torch
|
||||
import torchvision.transforms as T
|
||||
from PIL import Image, ImageOps
|
||||
from transformers import AutoProcessor, BatchFeature, LlamaTokenizerFast
|
||||
from transformers import BatchFeature, LlamaTokenizerFast
|
||||
from transformers.processing_utils import ProcessorMixin
|
||||
|
||||
|
||||
@@ -401,6 +401,3 @@ class DeepseekVLV2Processor(ProcessorMixin):
|
||||
images_spatial_crop,
|
||||
num_image_tokens,
|
||||
)
|
||||
|
||||
|
||||
AutoProcessor.register("DeepseekVLV2Processor", DeepseekVLV2Processor)
|
||||
|
||||
@@ -8,7 +8,6 @@ import torch
|
||||
import torch.nn.functional as F
|
||||
from transformers import (
|
||||
AutoFeatureExtractor,
|
||||
AutoProcessor,
|
||||
BatchFeature,
|
||||
)
|
||||
from transformers.feature_extraction_sequence_utils import SequenceFeatureExtractor
|
||||
@@ -345,4 +344,3 @@ class FireRedASR2Processor(ProcessorMixin):
|
||||
AutoFeatureExtractor.register(
|
||||
"FireRedASR2FeatureExtractor", FireRedASR2FeatureExtractor
|
||||
)
|
||||
AutoProcessor.register("FireRedASR2Processor", FireRedASR2Processor)
|
||||
@@ -9,7 +9,6 @@ import torchaudio.compliance.kaldi as kaldi
|
||||
from torch.nn.utils.rnn import pad_sequence
|
||||
from transformers import (
|
||||
AutoFeatureExtractor,
|
||||
AutoProcessor,
|
||||
BatchFeature,
|
||||
)
|
||||
from transformers.feature_extraction_sequence_utils import SequenceFeatureExtractor
|
||||
@@ -503,4 +502,3 @@ class FunASRProcessor(ProcessorMixin):
|
||||
|
||||
|
||||
AutoFeatureExtractor.register("FunASRFeatureExtractor", FunASRFeatureExtractor)
|
||||
AutoProcessor.register("FunASRProcessor", FunASRProcessor)
|
||||
@@ -5,7 +5,6 @@
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import AutoProcessor
|
||||
from transformers.feature_extraction_utils import BatchFeature
|
||||
from transformers.image_utils import ImageInput
|
||||
from transformers.processing_utils import ProcessorMixin
|
||||
@@ -225,6 +224,3 @@ def split_image_into_patch_blocks(
|
||||
patches = img.reshape(-1, 3, patch_size, patch_size)
|
||||
|
||||
return patches
|
||||
|
||||
|
||||
AutoProcessor.register("HunYuanVLProcessor", HunYuanVLProcessor)
|
||||
|
||||
@@ -26,7 +26,7 @@ from functools import cached_property
|
||||
|
||||
import PIL
|
||||
import torch
|
||||
from transformers import AutoProcessor, BatchFeature
|
||||
from transformers import BatchFeature
|
||||
from transformers.image_utils import ImageInput
|
||||
from transformers.processing_utils import ProcessingKwargs, ProcessorMixin, Unpack
|
||||
from transformers.tokenization_utils_base import PreTokenizedInput, TextInput
|
||||
@@ -453,6 +453,3 @@ class OvisProcessor(ProcessorMixin):
|
||||
dict.fromkeys(tokenizer_input_names + image_processor_input_names)
|
||||
)
|
||||
return names_from_processor + ["second_per_grid_ts"]
|
||||
|
||||
|
||||
AutoProcessor.register("OvisProcessor", OvisProcessor)
|
||||
|
||||
@@ -6,7 +6,7 @@ from functools import cached_property
|
||||
import numpy as np
|
||||
import PIL
|
||||
import torch
|
||||
from transformers import AutoProcessor, BatchFeature
|
||||
from transformers import BatchFeature
|
||||
from transformers.image_utils import ImageInput
|
||||
from transformers.processing_utils import ProcessingKwargs, ProcessorMixin, Unpack
|
||||
from transformers.tokenization_utils_base import PreTokenizedInput, TextInput
|
||||
@@ -476,6 +476,3 @@ class Ovis2_5Processor(ProcessorMixin):
|
||||
visual_placeholders,
|
||||
torch.tensor([[grid_t, grid_h, grid_w]]),
|
||||
)
|
||||
|
||||
|
||||
AutoProcessor.register("Ovis2_5Processor", Ovis2_5Processor)
|
||||
|
||||
@@ -227,6 +227,3 @@ class Qwen3ASRProcessor(ProcessorMixin):
|
||||
+ ["feature_attention_mask"]
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
AutoProcessor.register("Qwen3ASRProcessor", Qwen3ASRProcessor)
|
||||
|
||||
Reference in New Issue
Block a user