[CI] Speed up Whisper tests by reusing server (#22859)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
@@ -4,18 +4,21 @@
|
||||
import io
|
||||
# imports for guided decoding tests
|
||||
import json
|
||||
from unittest.mock import patch
|
||||
|
||||
import httpx
|
||||
import librosa
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
import soundfile as sf
|
||||
from openai._base_client import AsyncAPIClient
|
||||
|
||||
from vllm.assets.audio import AudioAsset
|
||||
|
||||
from ...utils import RemoteOpenAIServer
|
||||
|
||||
MODEL_NAME = "openai/whisper-small"
|
||||
SERVER_ARGS = ["--enforce-eager"]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def foscolo():
|
||||
@@ -25,50 +28,23 @@ def foscolo():
|
||||
yield f
|
||||
|
||||
|
||||
# NOTE: (NickLucche) the large-v3-turbo model was not trained on translation!
|
||||
@pytest.mark.asyncio
|
||||
async def test_basic_audio(foscolo):
|
||||
model_name = "openai/whisper-small"
|
||||
server_args = ["--enforce-eager"]
|
||||
with RemoteOpenAIServer(model_name, server_args) as remote_server:
|
||||
client = remote_server.get_async_client()
|
||||
translation = await client.audio.translations.create(
|
||||
model=model_name,
|
||||
file=foscolo,
|
||||
response_format="text",
|
||||
# TODO remove once language detection is implemented
|
||||
extra_body=dict(language="it"),
|
||||
temperature=0.0)
|
||||
out = json.loads(translation)['text'].strip().lower()
|
||||
assert "greek sea" in out
|
||||
@pytest.fixture(scope="module")
|
||||
def server():
|
||||
with RemoteOpenAIServer(MODEL_NAME, SERVER_ARGS) as remote_server:
|
||||
yield remote_server
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_audio_prompt(foscolo):
|
||||
model_name = "openai/whisper-small"
|
||||
server_args = ["--enforce-eager"]
|
||||
# Condition whisper on starting text
|
||||
prompt = "Nor have I ever"
|
||||
with RemoteOpenAIServer(model_name, server_args) as remote_server:
|
||||
client = remote_server.get_async_client()
|
||||
transcription = await client.audio.translations.create(
|
||||
model=model_name,
|
||||
file=foscolo,
|
||||
prompt=prompt,
|
||||
extra_body=dict(language="it"),
|
||||
response_format="text",
|
||||
temperature=0.0)
|
||||
out = json.loads(transcription)['text']
|
||||
assert "Nor will I ever touch the sacred" not in out
|
||||
assert prompt not in out
|
||||
@pytest_asyncio.fixture
|
||||
async def client(server):
|
||||
async with server.get_async_client() as async_client:
|
||||
yield async_client
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_non_asr_model(foscolo):
|
||||
# text to text model
|
||||
model_name = "JackFram/llama-68m"
|
||||
server_args = ["--enforce-eager"]
|
||||
with RemoteOpenAIServer(model_name, server_args) as remote_server:
|
||||
with RemoteOpenAIServer(model_name, SERVER_ARGS) as remote_server:
|
||||
client = remote_server.get_async_client()
|
||||
res = await client.audio.translations.create(model=model_name,
|
||||
file=foscolo,
|
||||
@@ -78,81 +54,117 @@ async def test_non_asr_model(foscolo):
|
||||
assert err["message"] == "The model does not support Translations API"
|
||||
|
||||
|
||||
# NOTE: (NickLucche) the large-v3-turbo model was not trained on translation!
|
||||
@pytest.mark.asyncio
|
||||
async def test_streaming_response(foscolo):
|
||||
model_name = "openai/whisper-small"
|
||||
server_args = ["--enforce-eager"]
|
||||
async def test_basic_audio(foscolo, client):
|
||||
translation = await client.audio.translations.create(
|
||||
model=MODEL_NAME,
|
||||
file=foscolo,
|
||||
response_format="text",
|
||||
# TODO remove once language detection is implemented
|
||||
extra_body=dict(language="it"),
|
||||
temperature=0.0)
|
||||
out = json.loads(translation)['text'].strip().lower()
|
||||
assert "greek sea" in out
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_audio_prompt(foscolo, client):
|
||||
# Condition whisper on starting text
|
||||
prompt = "Nor have I ever"
|
||||
transcription = await client.audio.translations.create(
|
||||
model=MODEL_NAME,
|
||||
file=foscolo,
|
||||
prompt=prompt,
|
||||
extra_body=dict(language="it"),
|
||||
response_format="text",
|
||||
temperature=0.0)
|
||||
out = json.loads(transcription)['text']
|
||||
assert "Nor will I ever touch the sacred" not in out
|
||||
assert prompt not in out
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_streaming_response(foscolo, client, server):
|
||||
translation = ""
|
||||
with RemoteOpenAIServer(model_name, server_args) as remote_server:
|
||||
client = remote_server.get_async_client()
|
||||
res_no_stream = await client.audio.translations.create(
|
||||
model=model_name,
|
||||
file=foscolo,
|
||||
response_format="json",
|
||||
extra_body=dict(language="it"),
|
||||
temperature=0.0)
|
||||
# Unfortunately this only works when the openai client is patched
|
||||
# to use streaming mode, not exposed in the translation api.
|
||||
original_post = AsyncAPIClient.post
|
||||
res_no_stream = await client.audio.translations.create(
|
||||
model=MODEL_NAME,
|
||||
file=foscolo,
|
||||
response_format="json",
|
||||
extra_body=dict(language="it"),
|
||||
temperature=0.0)
|
||||
# Stream via HTTPX since OpenAI translation client doesn't expose streaming
|
||||
url = server.url_for("v1/audio/translations")
|
||||
headers = {"Authorization": f"Bearer {server.DUMMY_API_KEY}"}
|
||||
data = {
|
||||
"model": MODEL_NAME,
|
||||
"language": "it",
|
||||
"stream": True,
|
||||
"temperature": 0.0,
|
||||
}
|
||||
foscolo.seek(0)
|
||||
async with httpx.AsyncClient() as http_client:
|
||||
files = {"file": foscolo}
|
||||
async with http_client.stream("POST",
|
||||
url,
|
||||
headers=headers,
|
||||
data=data,
|
||||
files=files) as response:
|
||||
async for line in response.aiter_lines():
|
||||
if not line:
|
||||
continue
|
||||
if line.startswith("data: "):
|
||||
line = line[len("data: "):]
|
||||
if line.strip() == "[DONE]":
|
||||
break
|
||||
chunk = json.loads(line)
|
||||
text = chunk["choices"][0].get("delta", {}).get("content")
|
||||
translation += text or ""
|
||||
|
||||
async def post_with_stream(*args, **kwargs):
|
||||
kwargs['stream'] = True
|
||||
return await original_post(*args, **kwargs)
|
||||
|
||||
with patch.object(AsyncAPIClient, "post", new=post_with_stream):
|
||||
client = remote_server.get_async_client()
|
||||
res = await client.audio.translations.create(model=model_name,
|
||||
file=foscolo,
|
||||
temperature=0.0,
|
||||
extra_body=dict(
|
||||
stream=True,
|
||||
language="it"))
|
||||
# Reconstruct from chunks and validate
|
||||
async for chunk in res:
|
||||
# just a chunk
|
||||
text = chunk.choices[0]['delta']['content']
|
||||
translation += text
|
||||
|
||||
assert translation == res_no_stream.text
|
||||
assert translation == res_no_stream.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_options(foscolo):
|
||||
model_name = "openai/whisper-small"
|
||||
server_args = ["--enforce-eager"]
|
||||
with RemoteOpenAIServer(model_name, server_args) as remote_server:
|
||||
original_post = AsyncAPIClient.post
|
||||
|
||||
async def post_with_stream(*args, **kwargs):
|
||||
kwargs['stream'] = True
|
||||
return await original_post(*args, **kwargs)
|
||||
|
||||
with patch.object(AsyncAPIClient, "post", new=post_with_stream):
|
||||
client = remote_server.get_async_client()
|
||||
res = await client.audio.translations.create(
|
||||
model=model_name,
|
||||
file=foscolo,
|
||||
temperature=0.0,
|
||||
extra_body=dict(language="it",
|
||||
stream=True,
|
||||
stream_include_usage=True,
|
||||
stream_continuous_usage_stats=True))
|
||||
final = False
|
||||
continuous = True
|
||||
async for chunk in res:
|
||||
if not len(chunk.choices):
|
||||
async def test_stream_options(foscolo, client, server):
|
||||
url = server.url_for("v1/audio/translations")
|
||||
headers = {"Authorization": f"Bearer {server.DUMMY_API_KEY}"}
|
||||
data = {
|
||||
"model": MODEL_NAME,
|
||||
"language": "it",
|
||||
"stream": True,
|
||||
"stream_include_usage": True,
|
||||
"stream_continuous_usage_stats": True,
|
||||
"temperature": 0.0,
|
||||
}
|
||||
foscolo.seek(0)
|
||||
final = False
|
||||
continuous = True
|
||||
async with httpx.AsyncClient() as http_client:
|
||||
files = {"file": foscolo}
|
||||
async with http_client.stream("POST",
|
||||
url,
|
||||
headers=headers,
|
||||
data=data,
|
||||
files=files) as response:
|
||||
async for line in response.aiter_lines():
|
||||
if not line:
|
||||
continue
|
||||
if line.startswith("data: "):
|
||||
line = line[len("data: "):]
|
||||
if line.strip() == "[DONE]":
|
||||
break
|
||||
chunk = json.loads(line)
|
||||
choices = chunk.get("choices", [])
|
||||
if not choices:
|
||||
# final usage sent
|
||||
final = True
|
||||
else:
|
||||
continuous = continuous and hasattr(chunk, 'usage')
|
||||
assert final and continuous
|
||||
continuous = continuous and ("usage" in chunk)
|
||||
assert final and continuous
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_long_audio_request(foscolo):
|
||||
model_name = "openai/whisper-small"
|
||||
server_args = ["--enforce-eager"]
|
||||
|
||||
async def test_long_audio_request(foscolo, client):
|
||||
foscolo.seek(0)
|
||||
audio, sr = librosa.load(foscolo)
|
||||
repeated_audio = np.tile(audio, 2)
|
||||
@@ -160,13 +172,11 @@ async def test_long_audio_request(foscolo):
|
||||
buffer = io.BytesIO()
|
||||
sf.write(buffer, repeated_audio, sr, format='WAV')
|
||||
buffer.seek(0)
|
||||
with RemoteOpenAIServer(model_name, server_args) as remote_server:
|
||||
client = remote_server.get_async_client()
|
||||
translation = await client.audio.translations.create(
|
||||
model=model_name,
|
||||
file=buffer,
|
||||
extra_body=dict(language="it"),
|
||||
response_format="text",
|
||||
temperature=0.0)
|
||||
out = json.loads(translation)['text'].strip().lower()
|
||||
assert out.count("greek sea") == 2
|
||||
translation = await client.audio.translations.create(
|
||||
model=MODEL_NAME,
|
||||
file=buffer,
|
||||
extra_body=dict(language="it"),
|
||||
response_format="text",
|
||||
temperature=0.0)
|
||||
out = json.loads(translation)['text'].strip().lower()
|
||||
assert out.count("greek sea") == 2
|
||||
|
||||
Reference in New Issue
Block a user