[Model] Add Granite 4.0 1B speech to supported models (#38019)
Signed-off-by: Nick Cao <ncao@redhat.com>
This commit is contained in:
@@ -658,7 +658,7 @@ Speech2Text models trained specifically for Automatic Speech Recognition.
|
||||
| `FunASRForConditionalGeneration` | FunASR | `allendou/Fun-ASR-Nano-2512-vllm`, etc. | | |
|
||||
| `Gemma3nForConditionalGeneration` | Gemma3n | `google/gemma-3n-E2B-it`, `google/gemma-3n-E4B-it`, etc. | | |
|
||||
| `GlmAsrForConditionalGeneration` | GLM-ASR | `zai-org/GLM-ASR-Nano-2512` | ✅︎ | ✅︎ |
|
||||
| `GraniteSpeechForConditionalGeneration` | Granite Speech | `ibm-granite/granite-speech-3.3-2b`, `ibm-granite/granite-speech-3.3-8b`, etc. | ✅︎ | ✅︎ |
|
||||
| `GraniteSpeechForConditionalGeneration` | Granite Speech | `ibm-granite/granite-4.0-1b-speech`, `ibm-granite/granite-speech-3.3-2b`, etc. | ✅︎ | ✅︎ |
|
||||
| `Qwen3ASRForConditionalGeneration` | Qwen3-ASR | `Qwen/Qwen3-ASR-1.7B`, etc. | | ✅︎ |
|
||||
| `Qwen3OmniMoeThinkerForConditionalGeneration` | Qwen3-Omni | `Qwen/Qwen3-Omni-30B-A3B-Instruct`, etc. | | ✅︎ |
|
||||
| `VoxtralForConditionalGeneration` | Voxtral (Mistral format) | `mistralai/Voxtral-Mini-3B-2507`, `mistralai/Voxtral-Small-24B-2507`, etc. | ✅︎ | ✅︎ |
|
||||
|
||||
@@ -29,10 +29,13 @@ def vllm_to_hf_output(
|
||||
|
||||
|
||||
MODEL_NAME = "ibm-granite/granite-speech-3.3-2b"
|
||||
# Audio lora co-exists directly in the model directory, but
|
||||
# currently still needs to be passed directly to vLLM.
|
||||
audio_lora_path = MODEL_NAME
|
||||
models = [MODEL_NAME]
|
||||
MODEL_NAME_4_0 = "ibm-granite/granite-4.0-1b-speech"
|
||||
# Audio lora co-exists directly in the 3.3 model directory,
|
||||
# the 4.0 model has adapters merged into the weights.
|
||||
models: dict[str, str | None] = {
|
||||
MODEL_NAME: MODEL_NAME,
|
||||
MODEL_NAME_4_0: None,
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -60,6 +63,7 @@ def run_test(
|
||||
tensor_parallel_size: int,
|
||||
distributed_executor_backend: str | None = None,
|
||||
attention_config: dict | None = None,
|
||||
audio_lora_path: str | None = None,
|
||||
):
|
||||
"""Inference result should be the same between hf and vllm.
|
||||
|
||||
@@ -84,12 +88,14 @@ def run_test(
|
||||
limit_mm_per_prompt={"audio": 1},
|
||||
tensor_parallel_size=tensor_parallel_size,
|
||||
distributed_executor_backend=distributed_executor_backend,
|
||||
enable_lora=True,
|
||||
enable_lora=audio_lora_path is not None,
|
||||
max_lora_rank=64,
|
||||
enforce_eager=True,
|
||||
attention_config=attention_config,
|
||||
) as vllm_model:
|
||||
lora_request = LoRARequest("audio", 1, audio_lora_path)
|
||||
lora_request = (
|
||||
LoRARequest("audio", 1, audio_lora_path) if audio_lora_path else None
|
||||
)
|
||||
vllm_outputs_per_case = [
|
||||
vllm_model.generate_greedy_logprobs(
|
||||
prompts,
|
||||
@@ -125,7 +131,7 @@ def run_test(
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", models)
|
||||
@pytest.mark.parametrize("model,audio_lora_path", models.items())
|
||||
@pytest.mark.parametrize(
|
||||
"dtype", ["float16"] if current_platform.is_rocm() else ["bfloat16"]
|
||||
)
|
||||
@@ -138,6 +144,7 @@ def test_models(
|
||||
hf_runner,
|
||||
vllm_runner,
|
||||
model: str,
|
||||
audio_lora_path: str | None,
|
||||
audio_assets: AudioTestAssets,
|
||||
granite_speech_attention_config,
|
||||
dtype: str,
|
||||
@@ -167,4 +174,5 @@ def test_models(
|
||||
num_logprobs=num_logprobs,
|
||||
tensor_parallel_size=1,
|
||||
attention_config=granite_speech_attention_config,
|
||||
audio_lora_path=audio_lora_path,
|
||||
)
|
||||
|
||||
@@ -810,7 +810,8 @@ _MULTIMODAL_EXAMPLE_MODELS = {
|
||||
),
|
||||
"GraniteVision": _HfExamplesInfo("ibm-granite/granite-vision-3.3-2b"),
|
||||
"GraniteSpeechForConditionalGeneration": _HfExamplesInfo(
|
||||
"ibm-granite/granite-speech-3.3-2b"
|
||||
"ibm-granite/granite-speech-3.3-2b",
|
||||
extras={"4.0-1b": "ibm-granite/granite-4.0-1b-speech"},
|
||||
),
|
||||
"GLM4VForCausalLM": _HfExamplesInfo(
|
||||
"zai-org/glm-4v-9b",
|
||||
|
||||
@@ -75,12 +75,14 @@ from .utils import AutoWeightsLoader, init_vllm_registered_model, maybe_prefix
|
||||
|
||||
# NOTE lang support is based on what is written here:
|
||||
# https://huggingface.co/ibm-granite/granite-speech-3.3-2b
|
||||
# https://huggingface.co/ibm-granite/granite-4.0-1b-speech
|
||||
# Though this may vary from model to model, and also many langs
|
||||
# work pretty well with zero shot.
|
||||
ISO639_1_SUPPORTED_LANGS = {
|
||||
"en": "English",
|
||||
"fr": "French",
|
||||
"de": "German",
|
||||
"ja": "Japanese",
|
||||
"pt": "Portuguese",
|
||||
"es": "Spanish",
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user