[Frontend] Add per-request number of cached token stats (#10174)

This commit is contained in:
zifeitong
2024-11-12 08:42:28 -08:00
committed by GitHub
parent 176fcb1c71
commit 47db6ec831
9 changed files with 89 additions and 23 deletions

View File

@@ -540,6 +540,7 @@ def init_app_state(
return_tokens_as_token_ids=args.return_tokens_as_token_ids,
enable_auto_tools=args.enable_auto_tool_choice,
tool_parser=args.tool_call_parser,
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
) if model_config.task == "generate" else None
state.openai_serving_completion = OpenAIServingCompletion(
engine_client,