[Frontend]Add support for transcriptions and translations to run_batch (#33934)

Signed-off-by: Pooya Davoodi <pooya.davoodi@parasail.io>
Signed-off-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
Pooya Davoodi
2026-02-07 05:24:57 -08:00
committed by GitHub
parent 4df44c16ba
commit 2cb2340f7a
3 changed files with 555 additions and 101 deletions

View File

@@ -7,6 +7,7 @@ import tempfile
import pytest
from vllm.assets.audio import AudioAsset
from vllm.entrypoints.openai.run_batch import BatchRequestOutput
MODEL_NAME = "hmellor/tiny-random-LlamaForCausalLM"
@@ -42,6 +43,27 @@ INPUT_RERANK_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/re
INPUT_REASONING_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "Qwen/Qwen3-0.6B", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Solve this math problem: 2+2=?"}]}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "Qwen/Qwen3-0.6B", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "What is the capital of France?"}]}}"""
# This is a valid but minimal audio file for testing
MINIMAL_WAV_BASE64 = "UklGRiQAAABXQVZFZm10IBAAAAABAAEAQB8AAEAfAAABAAgAZGF0YQAAAAA="
INPUT_TRANSCRIPTION_BATCH = (
'{{"custom_id": "request-1", "method": "POST", "url": "/v1/audio/transcriptions", '
'"body": {{"model": "openai/whisper-large-v3", "file_url": "data:audio/wav;base64,{}", '
'"response_format": "json"}}}}\n'
).format(MINIMAL_WAV_BASE64)
INPUT_TRANSCRIPTION_HTTP_BATCH = (
'{{"custom_id": "request-1", "method": "POST", "url": "/v1/audio/transcriptions", '
'"body": {{"model": "openai/whisper-large-v3", "file_url": "{}", '
'"response_format": "json"}}}}\n'
).format(AudioAsset("mary_had_lamb").url)
INPUT_TRANSLATION_BATCH = (
'{{"custom_id": "request-1", "method": "POST", "url": "/v1/audio/translations", '
'"body": {{"model": "openai/whisper-small", "file_url": "{}", '
'"response_format": "text", "language": "it", "to_language": "en", '
'"temperature": 0.0}}}}\n'
).format(AudioAsset("mary_had_lamb").url)
def test_empty_file():
with (
@@ -238,3 +260,121 @@ def test_reasoning_parser():
]
assert reasoning is not None
assert len(reasoning) > 0
def test_transcription():
with (
tempfile.NamedTemporaryFile("w") as input_file,
tempfile.NamedTemporaryFile("r") as output_file,
):
input_file.write(INPUT_TRANSCRIPTION_BATCH)
input_file.flush()
proc = subprocess.Popen(
[
"vllm",
"run-batch",
"-i",
input_file.name,
"-o",
output_file.name,
"--model",
"openai/whisper-large-v3",
],
)
proc.communicate()
proc.wait()
assert proc.returncode == 0, f"{proc=}"
contents = output_file.read()
print(f"\n\ncontents: {contents}\n\n")
for line in contents.strip().split("\n"):
BatchRequestOutput.model_validate_json(line)
line_dict = json.loads(line)
assert isinstance(line_dict, dict)
assert line_dict["error"] is None
response_body = line_dict["response"]["body"]
assert response_body is not None
assert "text" in response_body
assert "usage" in response_body
def test_transcription_http_url():
with (
tempfile.NamedTemporaryFile("w") as input_file,
tempfile.NamedTemporaryFile("r") as output_file,
):
input_file.write(INPUT_TRANSCRIPTION_HTTP_BATCH)
input_file.flush()
proc = subprocess.Popen(
[
"vllm",
"run-batch",
"-i",
input_file.name,
"-o",
output_file.name,
"--model",
"openai/whisper-large-v3",
],
)
proc.communicate()
proc.wait()
assert proc.returncode == 0, f"{proc=}"
contents = output_file.read()
for line in contents.strip().split("\n"):
BatchRequestOutput.model_validate_json(line)
line_dict = json.loads(line)
assert isinstance(line_dict, dict)
assert line_dict["error"] is None
response_body = line_dict["response"]["body"]
assert response_body is not None
assert "text" in response_body
assert "usage" in response_body
transcription_text = response_body["text"]
assert "Mary had a little lamb" in transcription_text
def test_translation():
with (
tempfile.NamedTemporaryFile("w") as input_file,
tempfile.NamedTemporaryFile("r") as output_file,
):
input_file.write(INPUT_TRANSLATION_BATCH)
input_file.flush()
proc = subprocess.Popen(
[
"vllm",
"run-batch",
"-i",
input_file.name,
"-o",
output_file.name,
"--model",
"openai/whisper-small",
],
)
proc.communicate()
proc.wait()
assert proc.returncode == 0, f"{proc=}"
contents = output_file.read()
for line in contents.strip().split("\n"):
BatchRequestOutput.model_validate_json(line)
line_dict = json.loads(line)
assert isinstance(line_dict, dict)
assert line_dict["error"] is None
response_body = line_dict["response"]["body"]
assert response_body is not None
assert "text" in response_body
translation_text = response_body["text"]
translation_text_lower = str(translation_text).strip().lower()
assert "mary" in translation_text_lower or "lamb" in translation_text_lower