[benchmark] add peak throughput metrics and plot (#23867)

Signed-off-by: simon-mo <simon.mo@hey.com>
This commit is contained in:
Simon Mo
2025-09-17 22:30:02 -07:00
committed by GitHub
parent b7433ca1a4
commit a904ea78ea
2 changed files with 134 additions and 69 deletions

View File

@@ -89,6 +89,7 @@ class RequestFuncOutput:
tpot: float = 0.0 # avg next-token latencies
prompt_len: int = 0
error: str = ""
start_time: float = 0.0
async def async_request_openai_completions(
@@ -140,6 +141,7 @@ async def async_request_openai_completions(
generated_text = ""
st = time.perf_counter()
output.start_time = st
most_recent_timestamp = st
try:
async with session.post(url=api_url, json=payload,
@@ -272,6 +274,7 @@ async def async_request_openai_chat_completions(
generated_text = ""
ttft = 0.0
st = time.perf_counter()
output.start_time = st
most_recent_timestamp = st
try:
async with session.post(url=api_url, json=payload,
@@ -396,6 +399,7 @@ async def async_request_openai_audio(
generated_text = ""
ttft = 0.0
st = time.perf_counter()
output.start_time = st
most_recent_timestamp = st
try:
async with session.post(url=api_url,
@@ -475,6 +479,7 @@ async def async_request_openai_embeddings(
output = RequestFuncOutput()
st = time.perf_counter()
output.start_time = st
try:
async with session.post(
url=api_url,