[Frontend] Add per-request number of cached token stats (#10174)

This commit is contained in:
zifeitong
2024-11-12 08:42:28 -08:00
committed by GitHub
parent 176fcb1c71
commit 47db6ec831
9 changed files with 89 additions and 23 deletions

View File

@@ -99,10 +99,15 @@ class ModelList(OpenAIBaseModel):
data: List[ModelCard] = Field(default_factory=list)
class PromptTokenUsageInfo(OpenAIBaseModel):
cached_tokens: Optional[int] = None
class UsageInfo(OpenAIBaseModel):
prompt_tokens: int = 0
total_tokens: int = 0
completion_tokens: Optional[int] = 0
prompt_tokens_details: Optional[PromptTokenUsageInfo] = None
class RequestResponseMetadata(BaseModel):