[Model][VLM] Add Qwen2.5-Omni model support (thinker only) (#15130)

Signed-off-by: fyabc <suyang.fy@alibaba-inc.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Xiong Wang <wangxiongts@163.com>
This commit is contained in:
Yang Fan
2025-04-19 14:14:36 +08:00
committed by GitHub
parent 5c9121203c
commit 2c1bd848a6
23 changed files with 1855 additions and 85 deletions

View File

@@ -111,6 +111,55 @@ def cached_processor_from_config(
)
def get_feature_extractor(
processor_name: str,
*args: Any,
trust_remote_code: bool = False,
**kwargs: Any,
):
"""Load an audio feature extractor for the given model name
via HuggingFace."""
# don't put this import at the top level
# it will call torch.cuda.device_count()
from transformers import AutoFeatureExtractor
from transformers.feature_extraction_utils import FeatureExtractionMixin
try:
feature_extractor = AutoFeatureExtractor.from_pretrained(
processor_name,
*args,
trust_remote_code=trust_remote_code,
**kwargs)
except ValueError as e:
# If the error pertains to the processor class not existing or not
# currently being imported, suggest using the --trust-remote-code flag.
# Unlike AutoTokenizer, AutoImageProcessor does not separate such errors
if not trust_remote_code:
err_msg = (
"Failed to load the feature extractor. If the feature "
"extractor is a custom extractor not yet available in the "
"HuggingFace transformers library, consider setting "
"`trust_remote_code=True` in LLM or using the "
"`--trust-remote-code` flag in the CLI.")
raise RuntimeError(err_msg) from e
else:
raise e
return cast(FeatureExtractionMixin, feature_extractor)
cached_get_feature_extractor = lru_cache(get_feature_extractor)
def cached_feature_extractor_from_config(
model_config: "ModelConfig",
**kwargs: Any,
):
return cached_get_feature_extractor(
model_config.model,
trust_remote_code=model_config.trust_remote_code,
**_merge_mm_kwargs(model_config, **kwargs),
)
def get_image_processor(
processor_name: str,
*args: Any,