diff --git a/tests/entrypoints/openai/test_realtime_validation.py b/tests/entrypoints/openai/test_realtime_validation.py index 7f12bcaca..946843e0b 100644 --- a/tests/entrypoints/openai/test_realtime_validation.py +++ b/tests/entrypoints/openai/test_realtime_validation.py @@ -129,5 +129,5 @@ async def test_multi_chunk_streaming( " First words I spoke in the original phonograph." " A little piece of practical poetry. Mary had a little lamb," " it sleeps with quite a flow, and everywhere that Mary went," - " the lamb was sure to go" + " the lamb was sure to go." ) diff --git a/vllm/entrypoints/openai/realtime/connection.py b/vllm/entrypoints/openai/realtime/connection.py index 6b779c720..fe1b0f5f3 100644 --- a/vllm/entrypoints/openai/realtime/connection.py +++ b/vllm/entrypoints/openai/realtime/connection.py @@ -48,7 +48,6 @@ class RealtimeConnection: self.generation_task: asyncio.Task | None = None self._is_connected = False - self._is_input_finished = False self._is_model_validated = False self._max_audio_filesize_mb = envs.VLLM_MAX_AUDIO_CLIP_FILESIZE_MB @@ -145,7 +144,7 @@ class RealtimeConnection: commit_event = InputAudioBufferCommit(**event) # final signals that the audio is finished if commit_event.final: - self._is_input_finished = True + self.audio_queue.put_nowait(None) else: await self.start_generation() else: @@ -239,11 +238,6 @@ class RealtimeConnection: # finish because websocket connection was killed break - if self.audio_queue.empty() and self._is_input_finished: - # finish because client signals that audio input - # is finished - break - usage = UsageInfo( prompt_tokens=prompt_token_ids_len, completion_tokens=completion_tokens_len,