[Frontend][1/n] Make pooling entrypoints request schema consensus | CompletionRequest (#32395)

Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io>
This commit is contained in:
wang.yuqi
2026-01-16 14:17:04 +08:00
committed by GitHub
parent 73f635a75f
commit 4ae77dfd42
22 changed files with 635 additions and 600 deletions

View File

@@ -13,6 +13,8 @@ from vllm.platforms import current_platform
MODEL_NAME = "BAAI/bge-reranker-base"
DTYPE = "bfloat16"
input_text = "This product was excellent and exceeded my expectations"
input_tokens = [0, 3293, 12996, 509, 40881, 136, 204839, 297, 759, 202702, 2]
@pytest.fixture(scope="module")
@@ -27,6 +29,21 @@ def server():
yield remote_server
@pytest.mark.parametrize("model_name", [MODEL_NAME])
def test_basic(server: RemoteOpenAIServer, model_name: str):
# test /v1/models
response = requests.get(server.url_for("/v1/models"))
served_model = response.json()["data"][0]["id"]
assert served_model == MODEL_NAME
# test /tokenize
response = requests.post(
server.url_for("/tokenize"),
json={"model": model_name, "prompt": input_text},
)
assert response.json()["tokens"] == input_tokens
@pytest.mark.parametrize("model_name", [MODEL_NAME])
def test_rerank_texts(server: RemoteOpenAIServer, model_name: str):
query = "What is the capital of France?"
@@ -170,7 +187,6 @@ async def test_use_activation(server: RemoteOpenAIServer, model_name: str):
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_pooling_classify(server: RemoteOpenAIServer, model_name: str):
input_text = "This product was excellent and exceeded my expectations"
response = requests.post(
server.url_for("pooling"),
json={
@@ -188,8 +204,6 @@ async def test_pooling_classify(server: RemoteOpenAIServer, model_name: str):
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_pooling_token_classify(server: RemoteOpenAIServer, model_name: str):
input_text = ["The chef prepared a delicious meal."]
response = requests.post(
server.url_for("pooling"),
json={"model": model_name, "input": input_text, "encoding_format": "float"},
@@ -198,7 +212,7 @@ async def test_pooling_token_classify(server: RemoteOpenAIServer, model_name: st
poolings = PoolingResponse.model_validate(response.json())
assert len(poolings.data) == 1
assert len(poolings.data[0].data) == 11
assert len(poolings.data[0].data) == len(input_tokens)
assert len(poolings.data[0].data[0]) == 1
@@ -212,7 +226,7 @@ async def test_pooling_not_supported(
server.url_for("pooling"),
json={
"model": model_name,
"input": "test",
"input": input_text,
"encoding_format": "float",
"task": task,
},

View File

@@ -7,7 +7,7 @@ import pytest
from vllm.config import ModelConfig
from vllm.entrypoints.chat_utils import ChatTemplateResolutionError
from vllm.entrypoints.score_utils import get_score_prompt
from vllm.entrypoints.pooling.score.utils import get_score_prompt
from vllm.inputs import TokensPrompt
from vllm.tokenizers import get_tokenizer
@@ -212,7 +212,7 @@ class TestGetScorePrompt:
return_value=mock_model_no_score_template,
),
patch(
"vllm.entrypoints.score_utils.apply_hf_chat_template",
"vllm.entrypoints.pooling.score.utils.apply_hf_chat_template",
return_value="test querytest doc",
),
):
@@ -245,7 +245,7 @@ class TestGetScorePrompt:
return_value=mock_model_no_score_template,
),
patch(
"vllm.entrypoints.score_utils.apply_hf_chat_template",
"vllm.entrypoints.pooling.score.utils.apply_hf_chat_template",
side_effect=ChatTemplateResolutionError("No template"),
),
):
@@ -296,7 +296,7 @@ class TestGetScorePrompt:
return_value=mock_model_no_score_template,
),
patch(
"vllm.entrypoints.score_utils.apply_hf_chat_template",
"vllm.entrypoints.pooling.score.utils.apply_hf_chat_template",
side_effect=ChatTemplateResolutionError("No template"),
),
):
@@ -331,7 +331,7 @@ class TestGetScorePrompt:
return_value=mock_model_with_score_template,
),
patch(
"vllm.entrypoints.score_utils.apply_hf_chat_template",
"vllm.entrypoints.pooling.score.utils.apply_hf_chat_template",
side_effect=ChatTemplateResolutionError("No template"),
),
):