diff --git a/examples/online_serving/openai_realtime_client.py b/examples/online_serving/openai_realtime_client.py index 5aa31e3e5..17335bd23 100644 --- a/examples/online_serving/openai_realtime_client.py +++ b/examples/online_serving/openai_realtime_client.py @@ -7,7 +7,7 @@ audio transcription by uploading an audio file. Before running this script, you must start the vLLM server with a realtime-capable model, for example: - vllm serve mistralai/Voxtral-Mini-3B-Realtime-2602 --enforce-eager + vllm serve mistralai/Voxtral-Mini-4B-Realtime-2602 --enforce-eager Requirements: - vllm with audio support @@ -126,7 +126,7 @@ if __name__ == "__main__": parser.add_argument( "--model", type=str, - default="mistralai/Voxtral-Mini-3B-Realtime-2602", + default="mistralai/Voxtral-Mini-4B-Realtime-2602", help="Model that is served and should be pinged.", ) parser.add_argument( diff --git a/examples/online_serving/openai_realtime_microphone_client.py b/examples/online_serving/openai_realtime_microphone_client.py index fc80b1c50..9a48f1466 100644 --- a/examples/online_serving/openai_realtime_microphone_client.py +++ b/examples/online_serving/openai_realtime_microphone_client.py @@ -5,7 +5,7 @@ Minimal Gradio demo for real-time speech transcription using the vLLM Realtime A Start the vLLM server first: - vllm serve mistralai/Voxtral-Mini-3B-Realtime-2602 --enforce-eager + vllm serve mistralai/Voxtral-Mini-4B-Realtime-2602 --enforce-eager Then run this script: @@ -166,7 +166,7 @@ if __name__ == "__main__": parser.add_argument( "--model", type=str, - default="mistralai/Voxtral-Mini-3B-Realtime-2602", + default="mistralai/Voxtral-Mini-4B-Realtime-2602", help="Model that is served and should be pinged.", ) parser.add_argument( diff --git a/tests/entrypoints/openai/test_realtime_validation.py b/tests/entrypoints/openai/test_realtime_validation.py index e0868a87d..7f12bcaca 100644 --- a/tests/entrypoints/openai/test_realtime_validation.py +++ b/tests/entrypoints/openai/test_realtime_validation.py @@ -24,7 +24,7 @@ MISTRAL_FORMAT_ARGS = [ "mistral", ] -MODEL_NAME = "mistralai/Voxtral-Mini-3B-Realtime-2602" +MODEL_NAME = "mistralai/Voxtral-Mini-4B-Realtime-2602" def _audio_to_base64_pcm16(path: str, target_sr: int = 16000) -> str: diff --git a/tests/models/multimodal/generation/test_voxtral_realtime.py b/tests/models/multimodal/generation/test_voxtral_realtime.py index a8fe162f8..d162f80ff 100644 --- a/tests/models/multimodal/generation/test_voxtral_realtime.py +++ b/tests/models/multimodal/generation/test_voxtral_realtime.py @@ -19,7 +19,7 @@ from vllm.engine.arg_utils import AsyncEngineArgs from vllm.inputs.data import TokensPrompt from vllm.v1.engine.async_llm import AsyncLLM, StreamingInput -MODEL_NAME = "mistralai/Voxtral-Mini-3B-Realtime-2602" +MODEL_NAME = "mistralai/Voxtral-Mini-4B-Realtime-2602" ENGINE_CONFIG = dict( model=MODEL_NAME, max_model_len=8192,