Signed-off-by: daje0601 <englishmt4118@gmail.com> Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
154 lines
4.8 KiB
Python
154 lines
4.8 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
"""
|
|
Integration tests for Whisper models with LoRA adapters.
|
|
|
|
These tests verify that Whisper models can correctly load and use LoRA adapters
|
|
for speech-to-text transcription tasks.
|
|
"""
|
|
|
|
import pytest
|
|
|
|
import vllm
|
|
from vllm.assets.audio import AudioAsset
|
|
from vllm.lora.request import LoRARequest
|
|
|
|
from ..utils import create_new_process_for_each_test
|
|
|
|
# Model configuration
|
|
WHISPER_MODEL = "openai/whisper-small"
|
|
|
|
# Test prompts for Whisper transcription
|
|
WHISPER_PROMPT = "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>"
|
|
|
|
# Note: whisper_lora_files fixture is defined in conftest.py
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def use_spawn_for_whisper(monkeypatch):
|
|
"""Whisper has issues with forked workers, use spawn instead."""
|
|
monkeypatch.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn")
|
|
|
|
|
|
def create_whisper_llm(enable_lora: bool = True, max_loras: int = 2):
|
|
"""Create a Whisper LLM instance with optional LoRA support."""
|
|
return vllm.LLM(
|
|
model=WHISPER_MODEL,
|
|
enable_lora=enable_lora,
|
|
max_loras=max_loras if enable_lora else 1,
|
|
max_lora_rank=64,
|
|
max_model_len=448,
|
|
dtype="half",
|
|
enforce_eager=True, # For stability in tests
|
|
)
|
|
|
|
|
|
def run_whisper_inference(
|
|
llm: vllm.LLM,
|
|
lora_path: str | None = None,
|
|
lora_id: int = 1,
|
|
) -> list[str]:
|
|
"""Run Whisper inference with optional LoRA adapter."""
|
|
# Load test audio
|
|
audio_asset = AudioAsset("mary_had_lamb")
|
|
audio_data = audio_asset.audio_and_sample_rate
|
|
|
|
inputs = [
|
|
{
|
|
"prompt": WHISPER_PROMPT,
|
|
"multi_modal_data": {"audio": audio_data},
|
|
}
|
|
]
|
|
|
|
sampling_params = vllm.SamplingParams(
|
|
temperature=0,
|
|
max_tokens=200,
|
|
)
|
|
|
|
# Prepare LoRA request if adapter path is provided
|
|
lora_request = None
|
|
if lora_path:
|
|
lora_request = LoRARequest(
|
|
lora_name=f"whisper_lora_{lora_id}",
|
|
lora_int_id=lora_id,
|
|
lora_path=lora_path,
|
|
)
|
|
|
|
outputs = llm.generate(inputs, sampling_params, lora_request=lora_request)
|
|
|
|
return [output.outputs[0].text for output in outputs]
|
|
|
|
|
|
@create_new_process_for_each_test()
|
|
def test_whisper_lora_inference(whisper_lora_files):
|
|
"""Test basic Whisper inference with a LoRA adapter.
|
|
|
|
This test verifies that:
|
|
1. Whisper model can be loaded with LoRA support enabled
|
|
2. A LoRA adapter can be applied during inference
|
|
3. The model produces valid transcription output
|
|
"""
|
|
llm = create_whisper_llm(enable_lora=True)
|
|
|
|
# Run inference with LoRA
|
|
outputs = run_whisper_inference(llm, lora_path=whisper_lora_files, lora_id=1)
|
|
|
|
# Verify we got a non-empty transcription
|
|
assert len(outputs) == 1
|
|
assert len(outputs[0]) > 0, "Expected non-empty transcription output"
|
|
|
|
# The output should contain some recognizable words from the audio
|
|
# (Mary had a little lamb)
|
|
print(f"Transcription output: {outputs[0]}")
|
|
|
|
|
|
@create_new_process_for_each_test()
|
|
def test_whisper_multi_lora(whisper_lora_files):
|
|
"""Test Whisper with multiple LoRA adapter IDs.
|
|
|
|
This test verifies that the same LoRA adapter can be loaded with
|
|
different IDs and produce consistent results.
|
|
"""
|
|
llm = create_whisper_llm(enable_lora=True, max_loras=4)
|
|
|
|
# Test with different LoRA IDs using the same adapter
|
|
outputs_lora1 = run_whisper_inference(llm, lora_path=whisper_lora_files, lora_id=1)
|
|
outputs_lora2 = run_whisper_inference(llm, lora_path=whisper_lora_files, lora_id=2)
|
|
|
|
# Both should produce valid outputs
|
|
assert len(outputs_lora1[0]) > 0
|
|
assert len(outputs_lora2[0]) > 0
|
|
|
|
# Same adapter with different IDs should produce same output
|
|
assert outputs_lora1 == outputs_lora2, (
|
|
f"Expected same outputs for same adapter with different IDs. "
|
|
f"Got: {outputs_lora1} vs {outputs_lora2}"
|
|
)
|
|
|
|
|
|
@create_new_process_for_each_test()
|
|
def test_whisper_with_and_without_lora(whisper_lora_files):
|
|
"""Test that Whisper produces different outputs with and without LoRA.
|
|
|
|
This test verifies that the LoRA adapter actually affects the model output.
|
|
"""
|
|
llm = create_whisper_llm(enable_lora=True)
|
|
|
|
# Run with LoRA
|
|
outputs_with_lora = run_whisper_inference(
|
|
llm, lora_path=whisper_lora_files, lora_id=1
|
|
)
|
|
|
|
# Run without LoRA (base model only)
|
|
outputs_without_lora = run_whisper_inference(llm, lora_path=None)
|
|
|
|
# Both should produce valid outputs
|
|
assert len(outputs_with_lora[0]) > 0
|
|
assert len(outputs_without_lora[0]) > 0
|
|
|
|
print(f"Output with LoRA: {outputs_with_lora[0]}")
|
|
print(f"Output without LoRA: {outputs_without_lora[0]}")
|
|
|
|
# Note: Outputs may or may not differ depending on the adapter
|
|
# The main verification is that both configurations work
|