[Model] Add support for moonshotai/Kimi-Audio-7B-Instruct (#36127)
Signed-off-by: tunglinwood <tunglinwood@gmail.com> Signed-off-by: tunglinwood <tomwu.tunglin@gmail.com> Signed-off-by: tunglinwood <113751333+tunglinwood@users.noreply.github.com>
This commit is contained in:
@@ -201,6 +201,34 @@ def run_granite_speech(question: str, audio_count: int) -> ModelRequestData:
|
||||
)
|
||||
|
||||
|
||||
# Kimi-Audio-7B-Instruct
|
||||
def run_kimi_audio(question: str, audio_count: int) -> ModelRequestData:
|
||||
"""Kimi-Audio-7B-Instruct for audio transcription and understanding."""
|
||||
model_name = "moonshotai/Kimi-Audio-7B-Instruct"
|
||||
|
||||
engine_args = EngineArgs(
|
||||
model=model_name,
|
||||
trust_remote_code=True,
|
||||
max_model_len=4096,
|
||||
max_num_seqs=2,
|
||||
limit_mm_per_prompt={"audio": audio_count},
|
||||
)
|
||||
|
||||
# Kimi-Audio uses <|im_kimia_text_blank|> as placeholder for audio features
|
||||
audio_placeholder = "<|im_kimia_text_blank|>" * audio_count
|
||||
# Default prompt for transcription
|
||||
if not question:
|
||||
question = "Please transcribe the audio"
|
||||
prompt = f"{audio_placeholder}{question}"
|
||||
|
||||
# Stop at EOS token (151644) to prevent repetition
|
||||
return ModelRequestData(
|
||||
engine_args=engine_args,
|
||||
prompt=prompt,
|
||||
stop_token_ids=[151644],
|
||||
)
|
||||
|
||||
|
||||
# MiDashengLM
|
||||
def run_midashenglm(question: str, audio_count: int):
|
||||
model_name = "mispeech/midashenglm-7b"
|
||||
@@ -485,6 +513,7 @@ model_example_map = {
|
||||
"glmasr": run_glmasr,
|
||||
"funaudiochat": run_funaudiochat,
|
||||
"granite_speech": run_granite_speech,
|
||||
"kimi_audio": run_kimi_audio,
|
||||
"midashenglm": run_midashenglm,
|
||||
"minicpmo": run_minicpmo,
|
||||
"phi4_mm": run_phi4mm,
|
||||
|
||||
Reference in New Issue
Block a user