[benchmark] add peak throughput metrics and plot (#23867)
Signed-off-by: simon-mo <simon.mo@hey.com>
This commit is contained in:
@@ -89,6 +89,7 @@ class RequestFuncOutput:
|
||||
tpot: float = 0.0 # avg next-token latencies
|
||||
prompt_len: int = 0
|
||||
error: str = ""
|
||||
start_time: float = 0.0
|
||||
|
||||
|
||||
async def async_request_openai_completions(
|
||||
@@ -140,6 +141,7 @@ async def async_request_openai_completions(
|
||||
|
||||
generated_text = ""
|
||||
st = time.perf_counter()
|
||||
output.start_time = st
|
||||
most_recent_timestamp = st
|
||||
try:
|
||||
async with session.post(url=api_url, json=payload,
|
||||
@@ -272,6 +274,7 @@ async def async_request_openai_chat_completions(
|
||||
generated_text = ""
|
||||
ttft = 0.0
|
||||
st = time.perf_counter()
|
||||
output.start_time = st
|
||||
most_recent_timestamp = st
|
||||
try:
|
||||
async with session.post(url=api_url, json=payload,
|
||||
@@ -396,6 +399,7 @@ async def async_request_openai_audio(
|
||||
generated_text = ""
|
||||
ttft = 0.0
|
||||
st = time.perf_counter()
|
||||
output.start_time = st
|
||||
most_recent_timestamp = st
|
||||
try:
|
||||
async with session.post(url=api_url,
|
||||
@@ -475,6 +479,7 @@ async def async_request_openai_embeddings(
|
||||
|
||||
output = RequestFuncOutput()
|
||||
st = time.perf_counter()
|
||||
output.start_time = st
|
||||
try:
|
||||
async with session.post(
|
||||
url=api_url,
|
||||
|
||||
Reference in New Issue
Block a user