[Model] Add support for moonshotai/Kimi-Audio-7B-Instruct (#36127)

Signed-off-by: tunglinwood <tunglinwood@gmail.com> Signed-off-by: tunglinwood <tomwu.tunglin@gmail.com> Signed-off-by: tunglinwood <113751333+tunglinwood@users.noreply.github.com>
2026-03-11 12:24:48 +08:00
parent a197eda9c3
commit 42fadebecb
14 changed files with 1446 additions and 29 deletions
--- a/examples/offline_inference/audio_language.py
+++ b/examples/offline_inference/audio_language.py
@@ -201,6 +201,34 @@ def run_granite_speech(question: str, audio_count: int) -> ModelRequestData:
    )


+# Kimi-Audio-7B-Instruct
+def run_kimi_audio(question: str, audio_count: int) -> ModelRequestData:
+    """Kimi-Audio-7B-Instruct for audio transcription and understanding."""
+    model_name = "moonshotai/Kimi-Audio-7B-Instruct"
+
+    engine_args = EngineArgs(
+        model=model_name,
+        trust_remote_code=True,
+        max_model_len=4096,
+        max_num_seqs=2,
+        limit_mm_per_prompt={"audio": audio_count},
+    )
+
+    # Kimi-Audio uses <|im_kimia_text_blank|> as placeholder for audio features
+    audio_placeholder = "<|im_kimia_text_blank|>" * audio_count
+    # Default prompt for transcription
+    if not question:
+        question = "Please transcribe the audio"
+    prompt = f"{audio_placeholder}{question}"
+
+    # Stop at EOS token (151644) to prevent repetition
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompt=prompt,
+        stop_token_ids=[151644],
+    )
+
+
 # MiDashengLM
 def run_midashenglm(question: str, audio_count: int):
    model_name = "mispeech/midashenglm-7b"
@@ -485,6 +513,7 @@ model_example_map = {
    "glmasr": run_glmasr,
    "funaudiochat": run_funaudiochat,
    "granite_speech": run_granite_speech,
+    "kimi_audio": run_kimi_audio,
    "midashenglm": run_midashenglm,
    "minicpmo": run_minicpmo,
    "phi4_mm": run_phi4mm,