[Doc] Show that use_audio_in_video is supported in docs (#30837)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -767,9 +767,6 @@ Some models are supported only via the [Transformers modeling backend](#transfor
|
||||
The official `openbmb/MiniCPM-V-2` doesn't work yet, so we need to use a fork (`HwwwH/MiniCPM-V-2`) for now.
|
||||
For more details, please see: <https://github.com/vllm-project/vllm/pull/4087#issuecomment-2250397630>
|
||||
|
||||
!!! note
|
||||
For Qwen2.5-Omni and Qwen3-Omni, reading audio from video pre-processing (`--mm-processor-kwargs '{"use_audio_in_video": true}'`) is currently work in progress and not yet supported.
|
||||
|
||||
#### Transcription
|
||||
|
||||
Speech2Text models trained specifically for Automatic Speech Recognition.
|
||||
|
||||
@@ -10,7 +10,6 @@ python examples/offline_inference/qwen2_5_omni/only_thinker.py \
|
||||
-q mixed_modalities
|
||||
|
||||
# Read vision and audio inputs from a single video file
|
||||
# NOTE: V1 engine does not support interleaved modalities yet.
|
||||
python examples/offline_inference/qwen2_5_omni/only_thinker.py \
|
||||
-q use_audio_in_video
|
||||
|
||||
|
||||
@@ -1128,8 +1128,6 @@ class Qwen2_5OmniThinkerForConditionalGeneration(
|
||||
multimodal_embeddings += tuple(audio_embeddings)
|
||||
return multimodal_embeddings
|
||||
|
||||
# TODO (ywang96): support overlapping modality embeddings so that
|
||||
# `use_audio_in_video` will work on V1.
|
||||
def embed_input_ids(
|
||||
self,
|
||||
input_ids: torch.Tensor,
|
||||
|
||||
@@ -1371,8 +1371,6 @@ class Qwen3OmniMoeThinkerForConditionalGeneration(
|
||||
return inputs_embeds
|
||||
|
||||
deepstack_input_embeds = None
|
||||
# TODO (ywang96): support overlapping modalitiy embeddings so that
|
||||
# `use_audio_in_video` will work on V1.
|
||||
# split the feat dim to obtain multi-scale visual feature
|
||||
has_vision_embeddings = [
|
||||
embeddings.shape[-1] != self.config.text_config.hidden_size
|
||||
|
||||
Reference in New Issue
Block a user