Fix AudioFlamingo3/MusicFlamingo HF parity and RoTE handling (#37643)
Signed-off-by: Lasha <26011196+lashahub@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
43877a620b
commit
e7767eccae
@@ -104,12 +104,22 @@ def run_musicflamingo(question: str, audio_count: int) -> ModelRequestData:
|
||||
enforce_eager=True,
|
||||
)
|
||||
|
||||
# MusicFlamingo uses <sound> token for audio
|
||||
# MusicFlamingo prompt placeholders use <sound>; vLLM's MusicFlamingo
|
||||
# multimodal processor expands each one into <|sound_bos|> + audio tokens +
|
||||
# <|sound_eos|> based on extracted audio feature lengths.
|
||||
audio_placeholder = "<sound>" * audio_count
|
||||
system_prompt = (
|
||||
"You are Music Flamingo, a multimodal assistant for language and music. "
|
||||
"On each turn you receive an audio clip which contains music and optional "
|
||||
"text, you will receive at least one or both; use your world knowledge and "
|
||||
"reasoning to help the user with any task. Interpret the entirety of the "
|
||||
"content any input music--regardlenss of whether the user calls it audio, "
|
||||
"music, or sound."
|
||||
)
|
||||
|
||||
prompt = (
|
||||
"<|im_start|>system\n"
|
||||
"You are a helpful assistant.<|im_end|>\n"
|
||||
f"{system_prompt}<|im_end|>\n"
|
||||
"<|im_start|>user\n"
|
||||
f"{audio_placeholder}{question}<|im_end|>\n"
|
||||
"<|im_start|>assistant\n"
|
||||
|
||||
Reference in New Issue
Block a user