[Voxtral Realtime] Fix engine crash on empty multimodal embeddings (#34862)
Signed-off-by: Tal Nir <tal@nervexneurotech.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -121,3 +121,75 @@ async def test_multi_chunk_streaming(
|
||||
" it sleeps with quite a flow, and everywhere that Mary went,"
|
||||
" the lamb was sure to go."
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_empty_commit_does_not_crash_engine(
|
||||
model_name, mary_had_lamb_audio_chunks, rocm_aiter_fa_attention
|
||||
):
|
||||
"""Test that committing without audio does not crash the engine.
|
||||
|
||||
Regression test for https://github.com/vllm-project/vllm/issues/34532.
|
||||
An empty commit (no prior input_audio_buffer.append) used to trigger
|
||||
``AssertionError: For realtime you must provide a multimodal_embedding
|
||||
at every step`` which killed the entire engine process, disconnecting
|
||||
every connected client.
|
||||
"""
|
||||
server_args = ["--enforce-eager", "--max-model-len", "2048"]
|
||||
|
||||
if model_name.startswith("mistralai"):
|
||||
server_args += MISTRAL_FORMAT_ARGS
|
||||
|
||||
add_attention_backend(server_args, rocm_aiter_fa_attention)
|
||||
|
||||
with RemoteOpenAIServer(model_name, server_args) as remote_server:
|
||||
ws_url = _get_websocket_url(remote_server)
|
||||
|
||||
# --- First connection: empty commit (no audio appended) ----------
|
||||
async with websockets.connect(ws_url) as ws:
|
||||
event = await receive_event(ws, timeout=30.0)
|
||||
assert event["type"] == "session.created"
|
||||
|
||||
await send_event(ws, {"type": "session.update", "model": model_name})
|
||||
|
||||
# Start generation without sending any audio
|
||||
await send_event(ws, {"type": "input_audio_buffer.commit"})
|
||||
|
||||
# Immediately signal end-of-audio
|
||||
await send_event(ws, {"type": "input_audio_buffer.commit", "final": True})
|
||||
|
||||
# We should get *some* response (error or empty transcription),
|
||||
# but the engine must NOT crash.
|
||||
event = await receive_event(ws, timeout=30.0)
|
||||
assert event["type"] in (
|
||||
"error",
|
||||
"transcription.done",
|
||||
"transcription.delta",
|
||||
)
|
||||
|
||||
# --- Second connection: normal transcription ---------------------
|
||||
# Verifies the engine is still alive after the empty commit above.
|
||||
async with websockets.connect(ws_url) as ws:
|
||||
event = await receive_event(ws, timeout=30.0)
|
||||
assert event["type"] == "session.created"
|
||||
|
||||
await send_event(ws, {"type": "session.update", "model": model_name})
|
||||
|
||||
await send_event(ws, {"type": "input_audio_buffer.commit"})
|
||||
|
||||
for chunk in mary_had_lamb_audio_chunks:
|
||||
await send_event(
|
||||
ws, {"type": "input_audio_buffer.append", "audio": chunk}
|
||||
)
|
||||
|
||||
await send_event(ws, {"type": "input_audio_buffer.commit", "final": True})
|
||||
|
||||
done_received = False
|
||||
while not done_received:
|
||||
event = await receive_event(ws, timeout=60.0)
|
||||
if event["type"] == "transcription.done":
|
||||
done_received = True
|
||||
elif event["type"] == "error":
|
||||
pytest.fail(f"Engine error after empty commit: {event}")
|
||||
assert done_received
|
||||
|
||||
Reference in New Issue
Block a user