Benchmark serving structured output (#10880)

Signed-off-by: Chendi Xue <chendi.xue@intel.com>
Co-authored-by: Michael Goin <michael@neuralmagic.com>
This commit is contained in:
Chendi.Xue
2024-12-04 15:28:21 -06:00
committed by GitHub
parent 10398b4706
commit 82eb5ea8f3
2 changed files with 887 additions and 0 deletions

View File

@@ -24,6 +24,7 @@ class RequestFuncInput:
model: str
best_of: int = 1
logprobs: Optional[int] = None
extra_body: Optional[dict] = None
multi_modal_content: Optional[dict] = None
ignore_eos: bool = False
@@ -36,6 +37,7 @@ class RequestFuncOutput:
ttft: float = 0.0 # Time to first token
itl: List[float] = field(
default_factory=list) # List of inter-token latencies
tpot: float = 0.0 # avg next-token latencies
prompt_len: int = 0
error: str = ""
@@ -242,6 +244,8 @@ async def async_request_openai_completions(
"stream": True,
"ignore_eos": request_func_input.ignore_eos,
}
if request_func_input.extra_body:
payload.update(request_func_input.extra_body)
headers = {
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"
}
@@ -336,6 +340,8 @@ async def async_request_openai_chat_completions(
"stream": True,
"ignore_eos": request_func_input.ignore_eos,
}
if request_func_input.extra_body:
payload.update(request_func_input.extra_body)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",