[Doc] Reorganize online pooling APIs (#11172)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2024-12-14 00:22:22 +08:00
committed by GitHub
parent 238c0d93b4
commit 0920ab9131
10 changed files with 431 additions and 351 deletions

View File

@@ -406,7 +406,7 @@ async def create_embedding(request: EmbeddingRequest, raw_request: Request):
assert_never(generator)
@router.post("/v1/score")
@router.post("/score")
async def create_score(request: ScoreRequest, raw_request: Request):
handler = score(raw_request)
if handler is None:
@@ -423,6 +423,15 @@ async def create_score(request: ScoreRequest, raw_request: Request):
assert_never(generator)
@router.post("/v1/score")
async def create_score_v1(request: ScoreRequest, raw_request: Request):
logger.warning(
"To indicate that Score API is not part of standard OpenAI API, we "
"have moved it to `/score`. Please update your client accordingly.")
return await create_score(request, raw_request)
if envs.VLLM_TORCH_PROFILER_DIR:
logger.warning(
"Torch Profiler is enabled in the API server. This should ONLY be "

View File

@@ -812,10 +812,11 @@ class ScoreRequest(OpenAIBaseModel):
text_2: Union[List[str], str]
truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None
# doc: begin-chat-embedding-pooling-params
# doc: begin-score-pooling-params
additional_data: Optional[Any] = None
# doc: end-chat-embedding-pooling-params
# doc: end-score-pooling-params
# doc: begin-score-extra-params
priority: int = Field(
default=0,
description=(
@@ -823,6 +824,8 @@ class ScoreRequest(OpenAIBaseModel):
"default: 0). Any priority other than 0 will raise an error "
"if the served model does not use priority scheduling."))
# doc: end-score-extra-params
def to_pooling_params(self):
return PoolingParams(additional_data=self.additional_data)