tests/entrypoints/openai/realtime/test_realtime_validation.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import asyncio
import json
import warnings

import librosa
import numpy as np
import pybase64 as base64
import pytest
import websockets

from tests.entrypoints.openai.conftest import add_attention_backend
from tests.utils import ROCM_ENV_OVERRIDES, ROCM_EXTRA_ARGS, RemoteOpenAIServer
from vllm.assets.audio import AudioAsset

# Increase engine iteration timeout for ROCm where first-use JIT compilation
# can exceed the default 60s, causing a silent deadlock in feed_tokens.
REALTIME_ENV_OVERRIDES = {
    **ROCM_ENV_OVERRIDES,
    "VLLM_ENGINE_ITERATION_TIMEOUT_S": "600",
}

MISTRAL_FORMAT_ARGS = [
    "--tokenizer_mode",
    "mistral",
    "--config_format",
    "mistral",
    "--load_format",
    "mistral",
] + ROCM_EXTRA_ARGS

MODEL_NAME = "mistralai/Voxtral-Mini-4B-Realtime-2602"


def _get_websocket_url(server: RemoteOpenAIServer) -> str:
    """Convert HTTP URL to WebSocket URL for realtime endpoint."""
    http_url = server.url_root
    ws_url = http_url.replace("http://", "ws://")
    return f"{ws_url}/v1/realtime"


async def receive_event(ws, timeout: float = 60.0) -> dict:
    """Receive and parse JSON event from WebSocket."""
    message = await asyncio.wait_for(ws.recv(), timeout=timeout)
    return json.loads(message)


async def send_event(ws, event: dict) -> None:
    """Send JSON event to WebSocket."""
    await ws.send(json.dumps(event))


@pytest.fixture
def mary_had_lamb_audio_chunks() -> list[str]:
    """Audio split into ~1 second chunks for streaming."""
    path = AudioAsset("mary_had_lamb").get_local_path()
    audio, _ = librosa.load(str(path), sr=16000, mono=True)

    # Split into ~0.1 second chunks (1600 samples at 16kHz)
    chunk_size = 1600
    chunks = []
    for i in range(0, len(audio), chunk_size):
        chunk = audio[i : i + chunk_size]
        chunk_int16 = (chunk * 32767).astype(np.int16)
        chunk_bytes = chunk_int16.tobytes()
        chunks.append(base64.b64encode(chunk_bytes).decode("utf-8"))

    return chunks


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_multi_chunk_streaming(
    model_name, mary_had_lamb_audio_chunks, rocm_aiter_fa_attention
):
    """Test streaming multiple audio chunks before committing."""
    server_args = ["--enforce-eager", "--max-model-len", "2048"]

    if model_name.startswith("mistralai"):
        server_args += MISTRAL_FORMAT_ARGS

    add_attention_backend(server_args, rocm_aiter_fa_attention)

    with RemoteOpenAIServer(
        model_name, server_args, env_dict=REALTIME_ENV_OVERRIDES
    ) as remote_server:
        ws_url = _get_websocket_url(remote_server)
        async with websockets.connect(ws_url) as ws:
            # Receive session.created
            event = await receive_event(ws, timeout=30.0)
            assert event["type"] == "session.created"

            await send_event(ws, {"type": "session.update", "model": model_name})

            # Wait for the server to acknowledge the session update.
            try:
                while True:
                    event = await receive_event(ws, timeout=5.0)
                    if event["type"] == "session.updated":
                        break
            except TimeoutError:
                warnings.warn(
                    f"session.updated not received within {5.0}s after "
                    "session.update. The server may not implement this event.",
                    stacklevel=2,
                )

            # (ROCm) Warm-up: send a non-final commit (required to start
            # transcription) with a small audio chunk to trigger aiter
            # compilation on first use.
            await send_event(ws, {"type": "input_audio_buffer.commit"})
            await send_event(
                ws,
                {
                    "type": "input_audio_buffer.append",
                    "audio": mary_had_lamb_audio_chunks[0],
                },
            )
            await send_event(ws, {"type": "input_audio_buffer.commit", "final": True})

            # (ROCm) Drain all warm-up responses with generous timeout for
            # JIT compilation
            warmup_done = False
            while not warmup_done:
                event = await receive_event(ws, timeout=600.0)
                if event["type"] in ("transcription.done", "error"):
                    warmup_done = True

            # Now send the real test audio
            await send_event(ws, {"type": "input_audio_buffer.commit"})

            # Send multiple audio chunks
            for chunk in mary_had_lamb_audio_chunks:
                await send_event(
                    ws, {"type": "input_audio_buffer.append", "audio": chunk}
                )

            # Send commit to end
            await send_event(ws, {"type": "input_audio_buffer.commit", "final": True})

            # Collect transcription deltas
            full_text = ""
            done_received = False

            while not done_received:
                event = await receive_event(ws, timeout=60.0)

                if event["type"] == "transcription.delta":
                    full_text += event["delta"]
                elif event["type"] == "transcription.done":
                    done_received = True
                    assert "text" in event
                elif event["type"] == "error":
                    pytest.fail(f"Received error: {event}")

            # Verify transcription contains expected content
            assert event["type"] == "transcription.done"
            assert event["text"] == full_text
            assert full_text == (
                " First words I spoke in the original phonograph."
                " A little piece of practical poetry. Mary had a little lamb,"
                " it sleeps with quite a flow, and everywhere that Mary went,"
                " the lamb was sure to go."
            )


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_empty_commit_does_not_crash_engine(
    model_name, mary_had_lamb_audio_chunks, rocm_aiter_fa_attention
):
    """Test that committing without audio does not crash the engine.

    Regression test for https://github.com/vllm-project/vllm/issues/34532.
    An empty commit (no prior input_audio_buffer.append) used to trigger
    ``AssertionError: For realtime you must provide a multimodal_embedding
    at every step`` which killed the entire engine process, disconnecting
    every connected client.
    """
    server_args = ["--enforce-eager", "--max-model-len", "2048"]

    if model_name.startswith("mistralai"):
        server_args += MISTRAL_FORMAT_ARGS

    add_attention_backend(server_args, rocm_aiter_fa_attention)

    with RemoteOpenAIServer(
        model_name, server_args, env_dict=REALTIME_ENV_OVERRIDES
    ) as remote_server:
        ws_url = _get_websocket_url(remote_server)

        # --- First connection: empty commit (no audio appended) ----------
        async with websockets.connect(ws_url) as ws:
            event = await receive_event(ws, timeout=30.0)
            assert event["type"] == "session.created"

            await send_event(ws, {"type": "session.update", "model": model_name})

            try:
                while True:
                    event = await receive_event(ws, timeout=5.0)
                    if event["type"] == "session.updated":
                        break
            except TimeoutError:
                warnings.warn(
                    f"session.updated not received within {5.0}s after "
                    "session.update. The server may not implement this event.",
                    stacklevel=2,
                )

            # Start generation without sending any audio
            await send_event(ws, {"type": "input_audio_buffer.commit"})

            # Immediately signal end-of-audio
            await send_event(ws, {"type": "input_audio_buffer.commit", "final": True})

            # We should get *some* response (error or empty transcription),
            # but the engine must NOT crash.
            # (ROCm) Use generous timeout for first request (aiter JIT compilation)
            event = await receive_event(ws, timeout=360.0)
            assert event["type"] in (
                "error",
                "transcription.done",
                "transcription.delta",
            )

        # --- Second connection: normal transcription ---------------------
        # Verifies the engine is still alive after the empty commit above.
        async with websockets.connect(ws_url) as ws:
            event = await receive_event(ws, timeout=30.0)
            assert event["type"] == "session.created"

            await send_event(ws, {"type": "session.update", "model": model_name})

            try:
                while True:
                    event = await receive_event(ws, timeout=5.0)
                    if event["type"] == "session.updated":
                        break
            except TimeoutError:
                warnings.warn(
                    f"session.updated not received within {5.0}s after "
                    "session.update. The server may not implement this event.",
                    stacklevel=2,
                )

            # Start transcription
            await send_event(ws, {"type": "input_audio_buffer.commit"})

            for chunk in mary_had_lamb_audio_chunks:
                await send_event(
                    ws, {"type": "input_audio_buffer.append", "audio": chunk}
                )

            await send_event(ws, {"type": "input_audio_buffer.commit", "final": True})

            done_received = False
            while not done_received:
                event = await receive_event(ws, timeout=60.0)
                if event["type"] == "transcription.done":
                    done_received = True
                elif event["type"] == "error":
                    pytest.fail(f"Engine error after empty commit: {event}")
            assert done_received


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_session_update_invalid_model_returns_error(
    model_name, rocm_aiter_fa_attention
):
    """Test that session.update with an invalid model returns an error."""
    server_args = ["--enforce-eager", "--max-model-len", "2048"]

    if model_name.startswith("mistralai"):
        server_args += MISTRAL_FORMAT_ARGS

    add_attention_backend(server_args, rocm_aiter_fa_attention)

    with RemoteOpenAIServer(
        model_name, server_args, env_dict=REALTIME_ENV_OVERRIDES
    ) as remote_server:
        ws_url = _get_websocket_url(remote_server)
        async with websockets.connect(ws_url) as ws:
            event = await receive_event(ws, timeout=30.0)
            assert event["type"] == "session.created"

            # Send session.update with a model that doesn't exist
            await send_event(
                ws,
                {"type": "session.update", "model": "nonexistent-model"},
            )

            event = await receive_event(ws, timeout=10.0)
            assert event["type"] == "error"
            assert "nonexistent-model" in event["error"]


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_commit_without_session_update_returns_error(
    model_name, rocm_aiter_fa_attention
):
    """Test that committing before validating the model returns an error
    and does not fall through to processing."""
    server_args = ["--enforce-eager", "--max-model-len", "2048"]

    if model_name.startswith("mistralai"):
        server_args += MISTRAL_FORMAT_ARGS

    add_attention_backend(server_args, rocm_aiter_fa_attention)

    with RemoteOpenAIServer(
        model_name, server_args, env_dict=REALTIME_ENV_OVERRIDES
    ) as remote_server:
        ws_url = _get_websocket_url(remote_server)
        async with websockets.connect(ws_url) as ws:
            event = await receive_event(ws, timeout=30.0)
            assert event["type"] == "session.created"

            # Send commit without sending session.update first
            await send_event(
                ws,
                {"type": "input_audio_buffer.commit", "final": True},
            )

            event = await receive_event(ws, timeout=10.0)
            assert event["type"] == "error"
            assert "model_not_validated" in event.get("code", "")
[Realtime API] Adds minimal realtime API based on websockets (#33187) Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: Nick Hill <nickhill123@gmail.com> 2026-01-30 11:41:29 +01:00			`# SPDX-License-Identifier: Apache-2.0`
			`# SPDX-FileCopyrightText: Copyright contributors to the vLLM project`

			`import asyncio`
			`import json`
[ROCm][CI] Fix realtime test timeouts caused by aiter JIT compilation delays (#35052) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-22 04:07:18 -06:00			`import warnings`
[Realtime API] Adds minimal realtime API based on websockets (#33187) Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: Nick Hill <nickhill123@gmail.com> 2026-01-30 11:41:29 +01:00
			`import librosa`
			`import numpy as np`
[Chore] Replace all base64 usages with faster pybase64 package (#37290) Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn> 2026-03-17 22:44:19 +08:00			`import pybase64 as base64`
[Realtime API] Adds minimal realtime API based on websockets (#33187) Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: Nick Hill <nickhill123@gmail.com> 2026-01-30 11:41:29 +01:00			`import pytest`
			`import websockets`

[Refactor] Relocate endpoint tests to mirror serving code directory structure (#37504) Signed-off-by: sfeng33 <4florafeng@gmail.com> 2026-03-19 03:19:36 -04:00			`from tests.entrypoints.openai.conftest import add_attention_backend`
			`from tests.utils import ROCM_ENV_OVERRIDES, ROCM_EXTRA_ARGS, RemoteOpenAIServer`
[Realtime API] Adds minimal realtime API based on websockets (#33187) Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: Nick Hill <nickhill123@gmail.com> 2026-01-30 11:41:29 +01:00			`from vllm.assets.audio import AudioAsset`

[CI] Fix realtime WebSocket timeout deadlock and unhandled model validation errors (#37483) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-03-25 05:24:33 -05:00			`# Increase engine iteration timeout for ROCm where first-use JIT compilation`
			`# can exceed the default 60s, causing a silent deadlock in feed_tokens.`
			`REALTIME_ENV_OVERRIDES = {`
			`**ROCM_ENV_OVERRIDES,`
			`"VLLM_ENGINE_ITERATION_TIMEOUT_S": "600",`
			`}`

[Realtime API] Adds minimal realtime API based on websockets (#33187) Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: Nick Hill <nickhill123@gmail.com> 2026-01-30 11:41:29 +01:00			`MISTRAL_FORMAT_ARGS = [`
			`"--tokenizer_mode",`
			`"mistral",`
			`"--config_format",`
			`"mistral",`
			`"--load_format",`
			`"mistral",`
[ROCm][CI] Making entrypoints more deterministic on ROCm (#36293) 2026-03-06 21:04:40 -06:00			`] + ROCM_EXTRA_ARGS`
[Realtime API] Adds minimal realtime API based on websockets (#33187) Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: Nick Hill <nickhill123@gmail.com> 2026-01-30 11:41:29 +01:00
[Voxtral Realtime] Change name (#33716) Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com> 2026-02-03 22:03:28 +01:00			`MODEL_NAME = "mistralai/Voxtral-Mini-4B-Realtime-2602"`
[Realtime API] Adds minimal realtime API based on websockets (#33187) Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: Nick Hill <nickhill123@gmail.com> 2026-01-30 11:41:29 +01:00

			`def _get_websocket_url(server: RemoteOpenAIServer) -> str:`
			`"""Convert HTTP URL to WebSocket URL for realtime endpoint."""`
			`http_url = server.url_root`
			`ws_url = http_url.replace("http://", "ws://")`
			`return f"{ws_url}/v1/realtime"`


			`async def receive_event(ws, timeout: float = 60.0) -> dict:`
			`"""Receive and parse JSON event from WebSocket."""`
			`message = await asyncio.wait_for(ws.recv(), timeout=timeout)`
			`return json.loads(message)`


			`async def send_event(ws, event: dict) -> None:`
			`"""Send JSON event to WebSocket."""`
			`await ws.send(json.dumps(event))`


			`@pytest.fixture`
			`def mary_had_lamb_audio_chunks() -> list[str]:`
			`"""Audio split into ~1 second chunks for streaming."""`
			`path = AudioAsset("mary_had_lamb").get_local_path()`
			`audio, _ = librosa.load(str(path), sr=16000, mono=True)`

			`# Split into ~0.1 second chunks (1600 samples at 16kHz)`
			`chunk_size = 1600`
			`chunks = []`
			`for i in range(0, len(audio), chunk_size):`
			`chunk = audio[i : i + chunk_size]`
			`chunk_int16 = (chunk * 32767).astype(np.int16)`
			`chunk_bytes = chunk_int16.tobytes()`
			`chunks.append(base64.b64encode(chunk_bytes).decode("utf-8"))`

			`return chunks`


			`@pytest.mark.asyncio`
			`@pytest.mark.parametrize("model_name", [MODEL_NAME])`
			`async def test_multi_chunk_streaming(`
			`model_name, mary_had_lamb_audio_chunks, rocm_aiter_fa_attention`
			`):`
			`"""Test streaming multiple audio chunks before committing."""`
[Voxstral Realtime] Enable tests (#33803) Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com> 2026-02-12 18:43:24 +01:00			`server_args = ["--enforce-eager", "--max-model-len", "2048"]`
[Realtime API] Adds minimal realtime API based on websockets (#33187) Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: Nick Hill <nickhill123@gmail.com> 2026-01-30 11:41:29 +01:00
			`if model_name.startswith("mistralai"):`
			`server_args += MISTRAL_FORMAT_ARGS`

			`add_attention_backend(server_args, rocm_aiter_fa_attention)`

[ROCm][CI] Making entrypoints more deterministic on ROCm (#36293) 2026-03-06 21:04:40 -06:00			`with RemoteOpenAIServer(`
[CI] Fix realtime WebSocket timeout deadlock and unhandled model validation errors (#37483) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-03-25 05:24:33 -05:00			`model_name, server_args, env_dict=REALTIME_ENV_OVERRIDES`
[ROCm][CI] Making entrypoints more deterministic on ROCm (#36293) 2026-03-06 21:04:40 -06:00			`) as remote_server:`
[Realtime API] Adds minimal realtime API based on websockets (#33187) Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: Nick Hill <nickhill123@gmail.com> 2026-01-30 11:41:29 +01:00			`ws_url = _get_websocket_url(remote_server)`
			`async with websockets.connect(ws_url) as ws:`
			`# Receive session.created`
			`event = await receive_event(ws, timeout=30.0)`
			`assert event["type"] == "session.created"`

			`await send_event(ws, {"type": "session.update", "model": model_name})`

[ROCm][CI] Fix realtime test timeouts caused by aiter JIT compilation delays (#35052) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-22 04:07:18 -06:00			`# Wait for the server to acknowledge the session update.`
			`try:`
			`while True:`
			`event = await receive_event(ws, timeout=5.0)`
			`if event["type"] == "session.updated":`
			`break`
			`except TimeoutError:`
			`warnings.warn(`
			`f"session.updated not received within {5.0}s after "`
			`"session.update. The server may not implement this event.",`
			`stacklevel=2,`
			`)`

			`# (ROCm) Warm-up: send a non-final commit (required to start`
			`# transcription) with a small audio chunk to trigger aiter`
			`# compilation on first use.`
			`await send_event(ws, {"type": "input_audio_buffer.commit"})`
			`await send_event(`
			`ws,`
			`{`
			`"type": "input_audio_buffer.append",`
			`"audio": mary_had_lamb_audio_chunks[0],`
			`},`
			`)`
			`await send_event(ws, {"type": "input_audio_buffer.commit", "final": True})`

			`# (ROCm) Drain all warm-up responses with generous timeout for`
			`# JIT compilation`
			`warmup_done = False`
			`while not warmup_done:`
[Model] Add HyperCLOVAX-SEED-Think-32B vision-language model support (#31471) Signed-off-by: effortprogrammer <yhjhoward7@gmail.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk> 2026-03-10 11:59:19 +09:00			`event = await receive_event(ws, timeout=600.0)`
[ROCm][CI] Fix realtime test timeouts caused by aiter JIT compilation delays (#35052) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-22 04:07:18 -06:00			`if event["type"] in ("transcription.done", "error"):`
			`warmup_done = True`

			`# Now send the real test audio`
[Realtime API] Adds minimal realtime API based on websockets (#33187) Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: Nick Hill <nickhill123@gmail.com> 2026-01-30 11:41:29 +01:00			`await send_event(ws, {"type": "input_audio_buffer.commit"})`

			`# Send multiple audio chunks`
			`for chunk in mary_had_lamb_audio_chunks:`
			`await send_event(`
			`ws, {"type": "input_audio_buffer.append", "audio": chunk}`
			`)`

			`# Send commit to end`
			`await send_event(ws, {"type": "input_audio_buffer.commit", "final": True})`

			`# Collect transcription deltas`
			`full_text = ""`
			`done_received = False`

			`while not done_received:`
			`event = await receive_event(ws, timeout=60.0)`

			`if event["type"] == "transcription.delta":`
			`full_text += event["delta"]`
			`elif event["type"] == "transcription.done":`
			`done_received = True`
			`assert "text" in event`
			`elif event["type"] == "error":`
			`pytest.fail(f"Received error: {event}")`

			`# Verify transcription contains expected content`
			`assert event["type"] == "transcription.done"`
			`assert event["text"] == full_text`
			`assert full_text == (`
[Voxtral Realtime] Introduce global log mel max (#33574) Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-02-02 23:01:47 +01:00			`" First words I spoke in the original phonograph."`
[Realtime API] Adds minimal realtime API based on websockets (#33187) Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: Nick Hill <nickhill123@gmail.com> 2026-01-30 11:41:29 +01:00			`" A little piece of practical poetry. Mary had a little lamb,"`
[Voxtral Realtime] Introduce global log mel max (#33574) Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-02-02 23:01:47 +01:00			`" it sleeps with quite a flow, and everywhere that Mary went,"`
[Bugfix] send None sentinel on final commit so server properly sends transcription.done (#33963) Signed-off-by: pjs102793 <pjs102793@naver.com> Co-authored-by: Nick Hill <nickhill123@gmail.com> 2026-02-12 06:01:53 +09:00			`" the lamb was sure to go."`
[Realtime API] Adds minimal realtime API based on websockets (#33187) Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: Nick Hill <nickhill123@gmail.com> 2026-01-30 11:41:29 +01:00			`)`
[Voxtral Realtime] Fix engine crash on empty multimodal embeddings (#34862) Signed-off-by: Tal Nir <tal@nervexneurotech.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-19 02:21:47 -05:00

			`@pytest.mark.asyncio`
			`@pytest.mark.parametrize("model_name", [MODEL_NAME])`
			`async def test_empty_commit_does_not_crash_engine(`
			`model_name, mary_had_lamb_audio_chunks, rocm_aiter_fa_attention`
			`):`
			`"""Test that committing without audio does not crash the engine.`

			`Regression test for https://github.com/vllm-project/vllm/issues/34532.`
			`An empty commit (no prior input_audio_buffer.append) used to trigger`
			``AssertionError: For realtime you must provide a multimodal_embedding
			at every step`` which killed the entire engine process, disconnecting
			`every connected client.`
			`"""`
			`server_args = ["--enforce-eager", "--max-model-len", "2048"]`

			`if model_name.startswith("mistralai"):`
			`server_args += MISTRAL_FORMAT_ARGS`

			`add_attention_backend(server_args, rocm_aiter_fa_attention)`

[ROCm][CI] Making entrypoints more deterministic on ROCm (#36293) 2026-03-06 21:04:40 -06:00			`with RemoteOpenAIServer(`
[CI] Fix realtime WebSocket timeout deadlock and unhandled model validation errors (#37483) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-03-25 05:24:33 -05:00			`model_name, server_args, env_dict=REALTIME_ENV_OVERRIDES`
[ROCm][CI] Making entrypoints more deterministic on ROCm (#36293) 2026-03-06 21:04:40 -06:00			`) as remote_server:`
[Voxtral Realtime] Fix engine crash on empty multimodal embeddings (#34862) Signed-off-by: Tal Nir <tal@nervexneurotech.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-19 02:21:47 -05:00			`ws_url = _get_websocket_url(remote_server)`

			`# --- First connection: empty commit (no audio appended) ----------`
			`async with websockets.connect(ws_url) as ws:`
			`event = await receive_event(ws, timeout=30.0)`
			`assert event["type"] == "session.created"`

			`await send_event(ws, {"type": "session.update", "model": model_name})`

[ROCm][CI] Fix realtime test timeouts caused by aiter JIT compilation delays (#35052) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-22 04:07:18 -06:00			`try:`
			`while True:`
			`event = await receive_event(ws, timeout=5.0)`
			`if event["type"] == "session.updated":`
			`break`
			`except TimeoutError:`
			`warnings.warn(`
			`f"session.updated not received within {5.0}s after "`
			`"session.update. The server may not implement this event.",`
			`stacklevel=2,`
			`)`

[Voxtral Realtime] Fix engine crash on empty multimodal embeddings (#34862) Signed-off-by: Tal Nir <tal@nervexneurotech.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-19 02:21:47 -05:00			`# Start generation without sending any audio`
			`await send_event(ws, {"type": "input_audio_buffer.commit"})`

			`# Immediately signal end-of-audio`
			`await send_event(ws, {"type": "input_audio_buffer.commit", "final": True})`

			`# We should get some response (error or empty transcription),`
			`# but the engine must NOT crash.`
[ROCm][CI] Fix realtime test timeouts caused by aiter JIT compilation delays (#35052) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-22 04:07:18 -06:00			`# (ROCm) Use generous timeout for first request (aiter JIT compilation)`
			`event = await receive_event(ws, timeout=360.0)`
[Voxtral Realtime] Fix engine crash on empty multimodal embeddings (#34862) Signed-off-by: Tal Nir <tal@nervexneurotech.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-19 02:21:47 -05:00			`assert event["type"] in (`
			`"error",`
			`"transcription.done",`
			`"transcription.delta",`
			`)`

			`# --- Second connection: normal transcription ---------------------`
			`# Verifies the engine is still alive after the empty commit above.`
			`async with websockets.connect(ws_url) as ws:`
			`event = await receive_event(ws, timeout=30.0)`
			`assert event["type"] == "session.created"`

			`await send_event(ws, {"type": "session.update", "model": model_name})`

[ROCm][CI] Fix realtime test timeouts caused by aiter JIT compilation delays (#35052) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-22 04:07:18 -06:00			`try:`
			`while True:`
			`event = await receive_event(ws, timeout=5.0)`
			`if event["type"] == "session.updated":`
			`break`
			`except TimeoutError:`
			`warnings.warn(`
			`f"session.updated not received within {5.0}s after "`
			`"session.update. The server may not implement this event.",`
			`stacklevel=2,`
			`)`

			`# Start transcription`
[Voxtral Realtime] Fix engine crash on empty multimodal embeddings (#34862) Signed-off-by: Tal Nir <tal@nervexneurotech.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-19 02:21:47 -05:00			`await send_event(ws, {"type": "input_audio_buffer.commit"})`

			`for chunk in mary_had_lamb_audio_chunks:`
			`await send_event(`
			`ws, {"type": "input_audio_buffer.append", "audio": chunk}`
			`)`

			`await send_event(ws, {"type": "input_audio_buffer.commit", "final": True})`

			`done_received = False`
			`while not done_received:`
			`event = await receive_event(ws, timeout=60.0)`
			`if event["type"] == "transcription.done":`
			`done_received = True`
			`elif event["type"] == "error":`
			`pytest.fail(f"Engine error after empty commit: {event}")`
			`assert done_received`
[CI] Fix realtime WebSocket timeout deadlock and unhandled model validation errors (#37483) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-03-25 05:24:33 -05:00

			`@pytest.mark.asyncio`
			`@pytest.mark.parametrize("model_name", [MODEL_NAME])`
			`async def test_session_update_invalid_model_returns_error(`
			`model_name, rocm_aiter_fa_attention`
			`):`
			`"""Test that session.update with an invalid model returns an error."""`
			`server_args = ["--enforce-eager", "--max-model-len", "2048"]`

			`if model_name.startswith("mistralai"):`
			`server_args += MISTRAL_FORMAT_ARGS`

			`add_attention_backend(server_args, rocm_aiter_fa_attention)`

			`with RemoteOpenAIServer(`
			`model_name, server_args, env_dict=REALTIME_ENV_OVERRIDES`
			`) as remote_server:`
			`ws_url = _get_websocket_url(remote_server)`
			`async with websockets.connect(ws_url) as ws:`
			`event = await receive_event(ws, timeout=30.0)`
			`assert event["type"] == "session.created"`

			`# Send session.update with a model that doesn't exist`
			`await send_event(`
			`ws,`
			`{"type": "session.update", "model": "nonexistent-model"},`
			`)`

			`event = await receive_event(ws, timeout=10.0)`
			`assert event["type"] == "error"`
			`assert "nonexistent-model" in event["error"]`


			`@pytest.mark.asyncio`
			`@pytest.mark.parametrize("model_name", [MODEL_NAME])`
			`async def test_commit_without_session_update_returns_error(`
			`model_name, rocm_aiter_fa_attention`
			`):`
			`"""Test that committing before validating the model returns an error`
			`and does not fall through to processing."""`
			`server_args = ["--enforce-eager", "--max-model-len", "2048"]`

			`if model_name.startswith("mistralai"):`
			`server_args += MISTRAL_FORMAT_ARGS`

			`add_attention_backend(server_args, rocm_aiter_fa_attention)`

			`with RemoteOpenAIServer(`
			`model_name, server_args, env_dict=REALTIME_ENV_OVERRIDES`
			`) as remote_server:`
			`ws_url = _get_websocket_url(remote_server)`
			`async with websockets.connect(ws_url) as ws:`
			`event = await receive_event(ws, timeout=30.0)`
			`assert event["type"] == "session.created"`

			`# Send commit without sending session.update first`
			`await send_event(`
			`ws,`
			`{"type": "input_audio_buffer.commit", "final": True},`
			`)`

			`event = await receive_event(ws, timeout=10.0)`
			`assert event["type"] == "error"`
			`assert "model_not_validated" in event.get("code", "")`