diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
index 791344b4f..e5c85dbe8 100644
--- a/docs/models/supported_models.md
+++ b/docs/models/supported_models.md
@@ -654,6 +654,7 @@ Speech2Text models trained specifically for Automatic Speech Recognition.
 
 | Architecture | Models | Example HF Models | [LoRA](../features/lora.md) | [PP](../serving/parallelism_scaling.md) |
 | ------------ | ------ | ----------------- | -------------------- | ------------------------- |
+| `CohereAsrForConditionalGeneration` | Cohere-Transcribe | `CohereLabs/cohere-transcribe-03-2026` | | |
 | `FireRedASR2ForConditionalGeneration` | FireRedASR2 | `allendou/FireRedASR2-LLM-vllm`, etc. | | |
 | `FunASRForConditionalGeneration` | FunASR | `allendou/Fun-ASR-Nano-2512-vllm`, etc. | | |
 | `Gemma3nForConditionalGeneration` | Gemma3n | `google/gemma-3n-E2B-it`, `google/gemma-3n-E4B-it`, etc. | | |
diff --git a/examples/offline_inference/audio_language.py b/examples/offline_inference/audio_language.py
index f384dc2bb..690aada03 100755
--- a/examples/offline_inference/audio_language.py
+++ b/examples/offline_inference/audio_language.py
@@ -72,8 +72,7 @@ def run_audioflamingo3(question: str, audio_count: int) -> ModelRequestData:
 # CohereASR
 def run_cohere_asr(question: str, audio_count: int) -> ModelRequestData:
     assert audio_count == 1, "CohereASR only support single audio input per prompt"
-    # TODO (ekagra): add HF ckpt after asr release
-    model_name = "/host/engines/vllm/audio/2b-release"
+    model_name = "CohereLabs/cohere-transcribe-03-2026"
 
     prompt = (
         "<|startofcontext|><|startoftranscript|>"
diff --git a/tests/entrypoints/openai/correctness/test_transcription_api_correctness.py b/tests/entrypoints/openai/correctness/test_transcription_api_correctness.py
index c4c7b8b7f..194c52eae 100644
--- a/tests/entrypoints/openai/correctness/test_transcription_api_correctness.py
+++ b/tests/entrypoints/openai/correctness/test_transcription_api_correctness.py
@@ -19,6 +19,7 @@ import soundfile
 import torch
 from datasets import load_dataset
 from evaluate import load
+from transformers.models.whisper.english_normalizer import EnglishTextNormalizer
 
 from vllm.tokenizers import get_tokenizer
 
@@ -33,6 +34,16 @@ def to_bytes(y, sr):
     return buffer
 
 
+# not all models have a normalizer so use the one from whisper as a standard option
+normalizer_model_info = HF_EXAMPLE_MODELS.find_hf_info("openai/whisper-large-v3")
+normalizer_tokenizer = get_tokenizer(
+    "openai/whisper-large-v3",
+    tokenizer_mode=normalizer_model_info.tokenizer_mode,
+    trust_remote_code=normalizer_model_info.trust_remote_code,
+)
+normalizer = EnglishTextNormalizer(normalizer_tokenizer.english_spelling_normalizer)
+
+
 async def transcribe_audio(client, tokenizer, y, sr):
     # Send loaded audio directly instead of loading from disk,
     # don't account for that time though
@@ -58,8 +69,8 @@ async def bound_transcribe(sem, client, tokenizer, audio, reference):
     async with sem:
         result = await transcribe_audio(client, tokenizer, *audio)
         # Normalize *english* output/reference for evaluation.
-        out = tokenizer.normalize(result[2])
-        ref = tokenizer.normalize(reference)
+        out = normalizer(result[2])
+        ref = normalizer(reference)
         return result[:2] + (out, ref)
 
 
@@ -156,8 +167,9 @@ def run_evaluation(
     "model_config",
     [
         ("openai/whisper-large-v3", 12.744980),
-        # TODO (ekagra): add HF ckpt after asr release
-        # ("/host/engines/vllm/audio/2b-release", 11.73),
+        # TODO (ekagra): turn on after asr release
+        # CohereASR is used to test the variable encoder length code paths
+        # ("CohereLabs/cohere-transcribe-03-2026", 11.92),
     ],
 )
 # Original dataset is 20GB+ in size, hence we use a pre-filtered slice.
diff --git a/tests/models/registry.py b/tests/models/registry.py
index 6ffd5d50a..feb074f11 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -1128,8 +1128,8 @@ _MULTIMODAL_EXAMPLE_MODELS = {
         tokenizer_mode="mistral",
     ),
     # [Encoder-decoder]
-    "CohereASRForConditionalGeneration": _HfExamplesInfo(
-        "/host/engines/vllm/audio/2b-release",
+    "CohereAsrForConditionalGeneration": _HfExamplesInfo(
+        "CohereLabs/cohere-transcribe-03-2026",
         trust_remote_code=True,
         is_available_online=False,  # TODO (ekagra): revert after asr release
     ),
diff --git a/vllm/model_executor/models/cohere_asr.py b/vllm/model_executor/models/cohere_asr.py
index 2f8513823..1cebea56a 100644
--- a/vllm/model_executor/models/cohere_asr.py
+++ b/vllm/model_executor/models/cohere_asr.py
@@ -1988,7 +1988,7 @@ class CohereASRMultiModalProcessor(EncDecMultiModalProcessor[CohereASRProcessing
     info=CohereASRProcessingInfo,
     dummy_inputs=CohereASRDummyInputsBuilder,
 )
-class CohereASRForConditionalGeneration(
+class CohereAsrForConditionalGeneration(
     nn.Module, SupportsTranscription, SupportsMultiModal
 ):
     packed_modules_mapping = {
diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py
index c3e7edb7d..839aba11c 100644
--- a/vllm/model_executor/models/registry.py
+++ b/vllm/model_executor/models/registry.py
@@ -525,9 +525,9 @@ _MULTIMODAL_MODELS = {
     "VoxtralForConditionalGeneration": ("voxtral", "VoxtralForConditionalGeneration"),
     "VoxtralRealtimeGeneration": ("voxtral_realtime", "VoxtralRealtimeGeneration"),
     # [Encoder-decoder]
-    "CohereASRForConditionalGeneration": (
+    "CohereAsrForConditionalGeneration": (
         "cohere_asr",
-        "CohereASRForConditionalGeneration",
+        "CohereAsrForConditionalGeneration",
     ),
     "NemotronParseForConditionalGeneration": (
         "nemotron_parse",