[Misc][Tools][Benchmark] Add benchmark_serving supports for llama.cpp. (#18692)

Signed-off-by: Duyi-Wang <duyi.wang@intel.com>
This commit is contained in:
Duyi-Wang
2025-05-29 20:02:08 +08:00
committed by GitHub
parent f8977c233f
commit b169d5f7b6
2 changed files with 6 additions and 1 deletions

View File

@@ -324,7 +324,7 @@ async def async_request_openai_completions(
most_recent_timestamp = timestamp
generated_text += text or ""
elif usage := data.get("usage"):
if usage := data.get("usage"):
output.output_tokens = usage.get("completion_tokens")
if first_chunk_received:
output.success = True
@@ -611,6 +611,7 @@ ASYNC_REQUEST_FUNCS = {
"tensorrt-llm": async_request_trt_llm,
"scalellm": async_request_openai_completions,
"sglang": async_request_openai_completions,
"llama.cpp": async_request_openai_completions,
}
OPENAI_COMPATIBLE_BACKENDS = [