[Frontend] Rerank API (Jina- and Cohere-compatible API) (#12376)

Signed-off-by: Kyle Mistele <kyle@mistele.com>
This commit is contained in:
Kyle Mistele
2025-01-26 20:58:45 -06:00
committed by GitHub
parent 72bac73067
commit 0034b09ceb
9 changed files with 552 additions and 11 deletions

View File

@@ -1018,6 +1018,52 @@ class ScoreRequest(OpenAIBaseModel):
return PoolingParams(additional_data=self.additional_data)
class RerankRequest(OpenAIBaseModel):
model: str
query: str
documents: List[str]
top_n: int = Field(default_factory=lambda: 0)
truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None
# doc: begin-rerank-pooling-params
additional_data: Optional[Any] = None
# doc: end-rerank-pooling-params
# doc: begin-rerank-extra-params
priority: int = Field(
default=0,
description=(
"The priority of the request (lower means earlier handling; "
"default: 0). Any priority other than 0 will raise an error "
"if the served model does not use priority scheduling."))
# doc: end-rerank-extra-params
def to_pooling_params(self):
return PoolingParams(additional_data=self.additional_data)
class RerankDocument(BaseModel):
text: str
class RerankResult(BaseModel):
index: int
document: RerankDocument
relevance_score: float
class RerankUsage(BaseModel):
total_tokens: int
class RerankResponse(OpenAIBaseModel):
id: str
model: str
usage: RerankUsage
results: List[RerankResult]
class CompletionLogProbs(OpenAIBaseModel):
text_offset: List[int] = Field(default_factory=list)
token_logprobs: List[Optional[float]] = Field(default_factory=list)