[Model] Add support for moonshotai/Kimi-Audio-7B-Instruct (#36127)

Signed-off-by: tunglinwood <tunglinwood@gmail.com>
Signed-off-by: tunglinwood <tomwu.tunglin@gmail.com>
Signed-off-by: tunglinwood <113751333+tunglinwood@users.noreply.github.com>
This commit is contained in:
tunglinwood
2026-03-11 12:24:48 +08:00
committed by GitHub
parent a197eda9c3
commit 42fadebecb
14 changed files with 1446 additions and 29 deletions

View File

@@ -198,13 +198,17 @@ def get_text_token_prompts(
mm_counts,
mm_options={},
)
assert isinstance(inputs.prompt, str)
text_prompt = inputs.prompt
token_prompt = tokenizer.encode(
text_prompt,
add_special_tokens=_ADD_SPECIAL_TOKENS_OVERRIDES.get(model_type, True),
)
# Some models (e.g., Kimi-Audio) return token IDs directly instead of str
if isinstance(inputs.prompt, list):
text_prompt = None
token_prompt = inputs.prompt
else:
assert isinstance(inputs.prompt, str)
text_prompt = inputs.prompt
token_prompt = tokenizer.encode(
text_prompt,
add_special_tokens=_ADD_SPECIAL_TOKENS_OVERRIDES.get(model_type, True),
)
return text_prompt, token_prompt

View File

@@ -857,6 +857,15 @@ _MULTIMODAL_EXAMPLE_MODELS = {
"Kwai-Keye/Keye-VL-1_5-8B",
trust_remote_code=True,
),
"MoonshotKimiaForCausalLM": _HfExamplesInfo(
"moonshotai/Kimi-Audio-7B-Instruct",
tokenizer_mode="kimi_audio",
trust_remote_code=True,
),
"KimiK25ForConditionalGeneration": _HfExamplesInfo(
"moonshotai/Kimi-K2.5",
trust_remote_code=True,
),
"KimiVLForConditionalGeneration": _HfExamplesInfo(
"moonshotai/Kimi-VL-A3B-Instruct",
extras={"thinking": "moonshotai/Kimi-VL-A3B-Thinking"},
@@ -870,10 +879,6 @@ _MULTIMODAL_EXAMPLE_MODELS = {
)
},
),
"KimiK25ForConditionalGeneration": _HfExamplesInfo(
"moonshotai/Kimi-K2.5",
trust_remote_code=True,
),
"LightOnOCRForConditionalGeneration": _HfExamplesInfo(
"lightonai/LightOnOCR-1B-1025"
),

View File

@@ -103,6 +103,12 @@ def can_initialize(
"pickle error when loading `transformers.models.auto.CONFIG_MAPPING`"
)
if model_arch == "MoonshotKimiaForCausalLM":
pytest.skip(
"Kimi-Audio requires SpeechToTextConfig "
"which is not configured in test environment"
)
if model_arch in ["DeepseekV32ForCausalLM", "GlmMoeDsaForCausalLM"]:
from vllm.platforms import current_platform