[Frontend] Add per-request number of cached token stats (#10174)
This commit is contained in:
@@ -99,10 +99,15 @@ class ModelList(OpenAIBaseModel):
|
||||
data: List[ModelCard] = Field(default_factory=list)
|
||||
|
||||
|
||||
class PromptTokenUsageInfo(OpenAIBaseModel):
|
||||
cached_tokens: Optional[int] = None
|
||||
|
||||
|
||||
class UsageInfo(OpenAIBaseModel):
|
||||
prompt_tokens: int = 0
|
||||
total_tokens: int = 0
|
||||
completion_tokens: Optional[int] = 0
|
||||
prompt_tokens_details: Optional[PromptTokenUsageInfo] = None
|
||||
|
||||
|
||||
class RequestResponseMetadata(BaseModel):
|
||||
|
||||
Reference in New Issue
Block a user