diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
index 09051a37f..791344b4f 100644
--- a/docs/models/supported_models.md
+++ b/docs/models/supported_models.md
@@ -658,7 +658,7 @@ Speech2Text models trained specifically for Automatic Speech Recognition.
 | `FunASRForConditionalGeneration` | FunASR | `allendou/Fun-ASR-Nano-2512-vllm`, etc. | | |
 | `Gemma3nForConditionalGeneration` | Gemma3n | `google/gemma-3n-E2B-it`, `google/gemma-3n-E4B-it`, etc. | | |
 | `GlmAsrForConditionalGeneration` | GLM-ASR | `zai-org/GLM-ASR-Nano-2512` | ✅︎ | ✅︎ |
-| `GraniteSpeechForConditionalGeneration` | Granite Speech | `ibm-granite/granite-speech-3.3-2b`, `ibm-granite/granite-speech-3.3-8b`, etc. | ✅︎ | ✅︎ |
+| `GraniteSpeechForConditionalGeneration` | Granite Speech | `ibm-granite/granite-4.0-1b-speech`, `ibm-granite/granite-speech-3.3-2b`, etc. | ✅︎ | ✅︎ |
 | `Qwen3ASRForConditionalGeneration` | Qwen3-ASR | `Qwen/Qwen3-ASR-1.7B`, etc. | | ✅︎ |
 | `Qwen3OmniMoeThinkerForConditionalGeneration` | Qwen3-Omni | `Qwen/Qwen3-Omni-30B-A3B-Instruct`, etc. | | ✅︎ |
 | `VoxtralForConditionalGeneration` | Voxtral (Mistral format) | `mistralai/Voxtral-Mini-3B-2507`, `mistralai/Voxtral-Small-24B-2507`, etc. | ✅︎ | ✅︎ |
diff --git a/tests/models/multimodal/generation/test_granite_speech.py b/tests/models/multimodal/generation/test_granite_speech.py
index f0650d4c2..038a15d05 100644
--- a/tests/models/multimodal/generation/test_granite_speech.py
+++ b/tests/models/multimodal/generation/test_granite_speech.py
@@ -29,10 +29,13 @@ def vllm_to_hf_output(
 
 
 MODEL_NAME = "ibm-granite/granite-speech-3.3-2b"
-# Audio lora co-exists directly in the model directory, but
-# currently still needs to be passed directly to vLLM.
-audio_lora_path = MODEL_NAME
-models = [MODEL_NAME]
+MODEL_NAME_4_0 = "ibm-granite/granite-4.0-1b-speech"
+# Audio lora co-exists directly in the 3.3 model directory,
+# the 4.0 model has adapters merged into the weights.
+models: dict[str, str | None] = {
+    MODEL_NAME: MODEL_NAME,
+    MODEL_NAME_4_0: None,
+}
 
 
 @pytest.fixture
@@ -60,6 +63,7 @@ def run_test(
     tensor_parallel_size: int,
     distributed_executor_backend: str | None = None,
     attention_config: dict | None = None,
+    audio_lora_path: str | None = None,
 ):
     """Inference result should be the same between hf and vllm.
 
@@ -84,12 +88,14 @@ def run_test(
         limit_mm_per_prompt={"audio": 1},
         tensor_parallel_size=tensor_parallel_size,
         distributed_executor_backend=distributed_executor_backend,
-        enable_lora=True,
+        enable_lora=audio_lora_path is not None,
         max_lora_rank=64,
         enforce_eager=True,
         attention_config=attention_config,
     ) as vllm_model:
-        lora_request = LoRARequest("audio", 1, audio_lora_path)
+        lora_request = (
+            LoRARequest("audio", 1, audio_lora_path) if audio_lora_path else None
+        )
         vllm_outputs_per_case = [
             vllm_model.generate_greedy_logprobs(
                 prompts,
@@ -125,7 +131,7 @@ def run_test(
         )
 
 
-@pytest.mark.parametrize("model", models)
+@pytest.mark.parametrize("model,audio_lora_path", models.items())
 @pytest.mark.parametrize(
     "dtype", ["float16"] if current_platform.is_rocm() else ["bfloat16"]
 )
@@ -138,6 +144,7 @@ def test_models(
     hf_runner,
     vllm_runner,
     model: str,
+    audio_lora_path: str | None,
     audio_assets: AudioTestAssets,
     granite_speech_attention_config,
     dtype: str,
@@ -167,4 +174,5 @@ def test_models(
         num_logprobs=num_logprobs,
         tensor_parallel_size=1,
         attention_config=granite_speech_attention_config,
+        audio_lora_path=audio_lora_path,
     )
diff --git a/tests/models/registry.py b/tests/models/registry.py
index 0d1e8e348..6ffd5d50a 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -810,7 +810,8 @@ _MULTIMODAL_EXAMPLE_MODELS = {
     ),
     "GraniteVision": _HfExamplesInfo("ibm-granite/granite-vision-3.3-2b"),
     "GraniteSpeechForConditionalGeneration": _HfExamplesInfo(
-        "ibm-granite/granite-speech-3.3-2b"
+        "ibm-granite/granite-speech-3.3-2b",
+        extras={"4.0-1b": "ibm-granite/granite-4.0-1b-speech"},
     ),
     "GLM4VForCausalLM": _HfExamplesInfo(
         "zai-org/glm-4v-9b",
diff --git a/vllm/model_executor/models/granite_speech.py b/vllm/model_executor/models/granite_speech.py
index 1209f1cbe..b97fc67f1 100644
--- a/vllm/model_executor/models/granite_speech.py
+++ b/vllm/model_executor/models/granite_speech.py
@@ -75,12 +75,14 @@ from .utils import AutoWeightsLoader, init_vllm_registered_model, maybe_prefix
 
 # NOTE lang support is based on what is written here:
 # https://huggingface.co/ibm-granite/granite-speech-3.3-2b
+# https://huggingface.co/ibm-granite/granite-4.0-1b-speech
 # Though this may vary from model to model, and also many langs
 # work pretty well with zero shot.
 ISO639_1_SUPPORTED_LANGS = {
     "en": "English",
     "fr": "French",
     "de": "German",
+    "ja": "Japanese",
     "pt": "Portuguese",
     "es": "Spanish",
 }