[Model] Define merge_by_field_config MM interface (U-Z) (#26261)

Signed-off-by: Ayush Satyam <ayushsatyam146@gmail.com>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Ayush Satyam
2025-10-07 12:15:49 +05:30
committed by GitHub
parent 4dbdf4a294
commit 5f7e8a916a
4 changed files with 32 additions and 25 deletions

View File

@@ -61,7 +61,7 @@ from vllm.transformers_utils.tokenizer import (
)
from .interfaces import SupportsLoRA, SupportsMultiModal, SupportsTranscription
from .utils import flatten_bn, init_vllm_registered_model, maybe_prefix
from .utils import init_vllm_registered_model, maybe_prefix
logger = init_logger(__name__)
@@ -337,6 +337,8 @@ class VoxtralMultiModalProcessor(BaseMultiModalProcessor[VoxtralProcessingInfo])
class VoxtralForConditionalGeneration(
nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA, SupportsTranscription
):
merge_by_field_config = True
supported_languages = ISO639_1_SUPPORTED_LANGS
packed_modules_mapping = {
@@ -445,7 +447,6 @@ class VoxtralForConditionalGeneration(
f"Incorrect type of audio_arrays. Got type: {type(audio_arrays)}"
)
audio_arrays = flatten_bn(audio_arrays)
if isinstance(audio_arrays, torch.Tensor):
audio_arrays = list(audio_arrays.unbind(0))
return audio_arrays