[Model][Multimodal] Add explicit MusicFlamingo adapter (#32696)

Signed-off-by: WangHaoyuuu <mailwhaoyu@gmail.com>
This commit is contained in:
Wang Haoyu
2026-01-30 11:01:29 +08:00
committed by GitHub
parent 133765760b
commit c46b0cd0af
6 changed files with 115 additions and 2 deletions

View File

@@ -70,6 +70,34 @@ def run_audioflamingo3(question: str, audio_count: int) -> ModelRequestData:
)
# MusicFlamingo
def run_musicflamingo(question: str, audio_count: int) -> ModelRequestData:
model_name = "nvidia/music-flamingo-2601-hf"
engine_args = EngineArgs(
model=model_name,
max_model_len=4096,
max_num_seqs=2,
limit_mm_per_prompt={"audio": audio_count},
enforce_eager=True,
)
# MusicFlamingo uses <sound> token for audio
audio_placeholder = "<sound>" * audio_count
prompt = (
"<|im_start|>system\n"
"You are a helpful assistant.<|im_end|>\n"
"<|im_start|>user\n"
f"{audio_placeholder}{question}<|im_end|>\n"
"<|im_start|>assistant\n"
)
return ModelRequestData(
engine_args=engine_args,
prompt=prompt,
)
# Gemma3N
def run_gemma3n(question: str, audio_count: int) -> ModelRequestData:
model_name = "google/gemma-3n-E2B-it"
@@ -452,6 +480,7 @@ def run_whisper(question: str, audio_count: int) -> ModelRequestData:
model_example_map = {
"audioflamingo3": run_audioflamingo3,
"musicflamingo": run_musicflamingo,
"gemma3n": run_gemma3n,
"glmasr": run_glmasr,
"funaudiochat": run_funaudiochat,