Support online use_audio_in_video (#36319)

Signed-off-by: Tianyu Guo <guoty9@mail2.sysu.edu.cn>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
Tianyu Guo
2026-03-09 22:16:44 +08:00
committed by GitHub
parent 3ec2115015
commit 5578f2a4d3
10 changed files with 152 additions and 10 deletions

View File

@@ -78,7 +78,11 @@ from vllm.multimodal.parse import (
ModalityDataItems,
MultiModalDataItems,
)
from vllm.multimodal.processing import BaseDummyInputsBuilder
from vllm.multimodal.processing import (
BaseDummyInputsBuilder,
ProcessorInputs,
TimingContext,
)
from vllm.multimodal.processing.processor import (
BaseMultiModalProcessor,
MultiModalPromptUpdates,
@@ -811,6 +815,16 @@ class Qwen2_5OmniThinkerMultiModalProcessor(
),
]
def _cached_apply_hf_processor(
self,
inputs: ProcessorInputs,
timing_ctx: TimingContext,
):
mm_processor_kwargs = inputs.hf_processor_mm_kwargs
if mm_processor_kwargs.get("use_audio_in_video", False):
return self._apply_hf_processor(inputs, timing_ctx)
return super()._cached_apply_hf_processor(inputs, timing_ctx)
def _apply_hf_processor_main(
self,
prompt: str | list[int],