[Frontend][Doc][5/N] Improve all pooling task | Polish encode (pooling) api & Document. (#25524)

Signed-off-by: wang.yuqi <noooop@126.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
2025-10-30 20:13:05 +08:00
parent 74374386e2
commit 4464723f22
27 changed files with 499 additions and 131 deletions
--- a/tests/entrypoints/pooling/openai/test_rerank.py
+++ b/tests/entrypoints/pooling/openai/test_rerank.py
@@ -125,8 +125,8 @@ def test_invocations(server: RemoteOpenAIServer):

@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
-async def test_activation(server: RemoteOpenAIServer, model_name: str):
-    async def get_outputs(activation):
+async def test_use_activation(server: RemoteOpenAIServer, model_name: str):
+    async def get_outputs(use_activation):
        query = "What is the capital of France?"
        documents = [
            "The capital of Brazil is Brasilia.",
@@ -139,16 +139,16 @@ async def test_activation(server: RemoteOpenAIServer, model_name: str):
                "model": model_name,
                "query": query,
                "documents": documents,
-                "activation": activation,
+                "use_activation": use_activation,
            },
        )
        outputs = response.json()

        return torch.tensor([x["relevance_score"] for x in outputs["results"]])

-    default = await get_outputs(activation=None)
-    w_activation = await get_outputs(activation=True)
-    wo_activation = await get_outputs(activation=False)
+    default = await get_outputs(use_activation=None)
+    w_activation = await get_outputs(use_activation=True)
+    wo_activation = await get_outputs(use_activation=False)

    assert torch.allclose(default, w_activation, atol=1e-2), (
        "Default should use activation."
@@ -163,7 +163,25 @@ async def test_activation(server: RemoteOpenAIServer, model_name: str):

@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
-async def test_pooling(server: RemoteOpenAIServer, model_name: str):
+async def test_pooling_classify(server: RemoteOpenAIServer, model_name: str):
+    input_text = "This product was excellent and exceeded my expectations"
+    response = requests.post(
+        server.url_for("pooling"),
+        json={
+            "model": model_name,
+            "input": input_text,
+            "encoding_format": "float",
+            "task": "classify",
+        },
+    )
+    poolings = PoolingResponse.model_validate(response.json())
+    assert len(poolings.data) == 1
+    assert len(poolings.data[0].data) == 1
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+async def test_pooling_token_classify(server: RemoteOpenAIServer, model_name: str):
    input_text = ["The chef prepared a delicious meal."]

    response = requests.post(
@@ -176,3 +194,24 @@ async def test_pooling(server: RemoteOpenAIServer, model_name: str):
    assert len(poolings.data) == 1
    assert len(poolings.data[0].data) == 11
    assert len(poolings.data[0].data[0]) == 1
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.parametrize("task", ["embed", "token_embed", "plugin"])
+async def test_pooling_not_supported(
+    server: RemoteOpenAIServer, model_name: str, task: str
+):
+    response = requests.post(
+        server.url_for("pooling"),
+        json={
+            "model": model_name,
+            "input": "test",
+            "encoding_format": "float",
+            "task": task,
+        },
+    )
+    assert response.json()["error"]["type"] == "BadRequestError"
+    assert response.json()["error"]["message"].startswith(
+        f"Task {task} is not supported"
+    )