[Misc] Fix input processing for Ultravox (#13871)

This commit is contained in:
Roger Wang
2025-02-25 23:56:34 -08:00
committed by GitHub
parent 5157338ed9
commit 7ca1da020f
3 changed files with 6 additions and 15 deletions

View File

@@ -83,8 +83,8 @@ def _test_processing_correctness(
}
tokenizer_encode_kwargs = {}
if model_config.hf_config.model_type in ("mllama", "whisper"):
# For some encoder-decoder models, tokenizer will always add bos_token
if model_config.hf_config.model_type in ("mllama", "whisper", "ultravox"):
# For some multimodal models, tokenizer will always add bos_token
# at the beginning of prompt by default, causing hf_processor outputs
# incorrect token ids. So we need use `add_special_tokens=False` here
# to leave bos_token to be added by the processor.
@@ -172,7 +172,7 @@ def _test_processing_correctness(
"Qwen/Qwen2-VL-2B-Instruct",
"Qwen/Qwen2.5-VL-3B-Instruct",
"Qwen/Qwen2-Audio-7B-Instruct",
"fixie-ai/ultravox-v0_5-llama-3_2-1b",
"fixie-ai/ultravox-v0_4",
"openai/whisper-large-v3",
])
@pytest.mark.parametrize("hit_rate", [0.3, 0.5, 1.0])