[ASR] Fix audio benchmark and add RTFx metric (#32300)

Signed-off-by: Ekagra Ranjan <3116519+ekagra-ranjan@users.noreply.github.com> Co-authored-by: Nicolò Lucchesi <nicolo.lucchesi@gmail.com>
2026-02-09 05:02:37 -05:00
parent 3025b3cebb
commit 1d5922fade
4 changed files with 90 additions and 19 deletions
--- a/vllm/benchmarks/lib/endpoint_request_func.py
+++ b/vllm/benchmarks/lib/endpoint_request_func.py
@@ -93,6 +93,7 @@ class RequestFuncOutput:
    prompt_len: int = 0
    error: str = ""
    start_time: float = 0.0
+    input_audio_duration: float = 0.0  # in seconds


 class RequestFunc(Protocol):
@@ -422,6 +423,8 @@ async def async_request_openai_audio(

        output = RequestFuncOutput()
        output.prompt_len = request_func_input.prompt_len
+        output.input_audio_duration = soundfile.info(f).duration
+        f.seek(0)

        generated_text = ""
        ttft = 0.0
@@ -442,7 +445,9 @@ async def async_request_openai_audio(

                        messages = handler.add_chunk(chunk_bytes)
                        for message in messages:
-                            chunk = message.decode("utf-8").removeprefix("data: ")
+                            if type(message) is bytes:
+                                message = message.decode("utf-8")
+                            chunk = message.removeprefix("data: ")
                            if chunk != "[DONE]":
                                timestamp = time.perf_counter()
                                data = json.loads(chunk)