[Frontend][Doc][5/N] Improve all pooling task | Polish encode (pooling) api & Document. (#25524)

Signed-off-by: wang.yuqi <noooop@126.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
wang.yuqi
2025-10-30 20:13:05 +08:00
committed by GitHub
parent 74374386e2
commit 4464723f22
27 changed files with 499 additions and 131 deletions

View File

@@ -125,8 +125,8 @@ def test_invocations(server: RemoteOpenAIServer):
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_activation(server: RemoteOpenAIServer, model_name: str):
async def get_outputs(activation):
async def test_use_activation(server: RemoteOpenAIServer, model_name: str):
async def get_outputs(use_activation):
query = "What is the capital of France?"
documents = [
"The capital of Brazil is Brasilia.",
@@ -139,16 +139,16 @@ async def test_activation(server: RemoteOpenAIServer, model_name: str):
"model": model_name,
"query": query,
"documents": documents,
"activation": activation,
"use_activation": use_activation,
},
)
outputs = response.json()
return torch.tensor([x["relevance_score"] for x in outputs["results"]])
default = await get_outputs(activation=None)
w_activation = await get_outputs(activation=True)
wo_activation = await get_outputs(activation=False)
default = await get_outputs(use_activation=None)
w_activation = await get_outputs(use_activation=True)
wo_activation = await get_outputs(use_activation=False)
assert torch.allclose(default, w_activation, atol=1e-2), (
"Default should use activation."
@@ -163,7 +163,25 @@ async def test_activation(server: RemoteOpenAIServer, model_name: str):
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_pooling(server: RemoteOpenAIServer, model_name: str):
async def test_pooling_classify(server: RemoteOpenAIServer, model_name: str):
input_text = "This product was excellent and exceeded my expectations"
response = requests.post(
server.url_for("pooling"),
json={
"model": model_name,
"input": input_text,
"encoding_format": "float",
"task": "classify",
},
)
poolings = PoolingResponse.model_validate(response.json())
assert len(poolings.data) == 1
assert len(poolings.data[0].data) == 1
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_pooling_token_classify(server: RemoteOpenAIServer, model_name: str):
input_text = ["The chef prepared a delicious meal."]
response = requests.post(
@@ -176,3 +194,24 @@ async def test_pooling(server: RemoteOpenAIServer, model_name: str):
assert len(poolings.data) == 1
assert len(poolings.data[0].data) == 11
assert len(poolings.data[0].data[0]) == 1
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize("task", ["embed", "token_embed", "plugin"])
async def test_pooling_not_supported(
server: RemoteOpenAIServer, model_name: str, task: str
):
response = requests.post(
server.url_for("pooling"),
json={
"model": model_name,
"input": "test",
"encoding_format": "float",
"task": task,
},
)
assert response.json()["error"]["type"] == "BadRequestError"
assert response.json()["error"]["message"].startswith(
f"Task {task} is not supported"
)