[Deprecate] Deprecate pooling multi task support. (#37956)

Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io> Signed-off-by: wang.yuqi <noooop@126.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
2026-03-24 22:07:47 +08:00
parent 352b90c4a4
commit 1b6cb920e6
18 changed files with 566 additions and 66 deletions
--- a/tests/entrypoints/pooling/token_classify/init.py
+++ b/tests/entrypoints/pooling/token_classify/init.py
--- a/tests/entrypoints/pooling/token_classify/test_offline.py
+++ b/tests/entrypoints/pooling/token_classify/test_offline.py
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import logging
+import weakref
+
+import pytest
+
+from vllm import LLM, PoolingRequestOutput
+from vllm.config import PoolerConfig
+from vllm.distributed import cleanup_dist_env_and_memory
+from vllm.tasks import PoolingTask
+
+MODEL_NAME = "jason9693/Qwen2.5-1.5B-apeach"
+
+prompt = "The chef prepared a delicious meal."
+prompt_token_ids = [785, 29706, 10030, 264, 17923, 15145, 13]
+num_labels = 2
+
+
+@pytest.fixture(scope="module")
+def llm():
+    # pytest caches the fixture so we use weakref.proxy to
+    # enable garbage collection
+    llm = LLM(
+        model=MODEL_NAME,
+        pooler_config=PoolerConfig(task="token_classify"),
+        max_num_batched_tokens=32768,
+        tensor_parallel_size=1,
+        gpu_memory_utilization=0.75,
+        enforce_eager=True,
+        seed=0,
+    )
+
+    yield weakref.proxy(llm)
+
+    del llm
+
+    cleanup_dist_env_and_memory()
+
+
+@pytest.mark.skip_global_cleanup
+def test_str_prompts(llm: LLM):
+    outputs = llm.encode(prompt, pooling_task="token_classify", use_tqdm=False)
+    assert len(outputs) == 1
+    assert isinstance(outputs[0], PoolingRequestOutput)
+    assert outputs[0].prompt_token_ids == prompt_token_ids
+    assert outputs[0].outputs.data.shape == (len(prompt_token_ids), num_labels)
+
+
+@pytest.mark.skip_global_cleanup
+def test_token_ids_prompts(llm: LLM):
+    outputs = llm.encode(
+        [prompt_token_ids], pooling_task="token_classify", use_tqdm=False
+    )
+    assert len(outputs) == 1
+    assert isinstance(outputs[0], PoolingRequestOutput)
+    assert outputs[0].prompt_token_ids == prompt_token_ids
+    assert outputs[0].outputs.data.shape == (len(prompt_token_ids), num_labels)
+
+
+@pytest.mark.skip_global_cleanup
+def test_score_api(llm: LLM):
+    err_msg = "Score API is only enabled for num_labels == 1."
+    with pytest.raises(ValueError, match=err_msg):
+        llm.score("ping", "pong", use_tqdm=False)
+
+
+@pytest.mark.parametrize("task", ["classify", "embed", "token_embed"])
+def test_unsupported_tasks(llm: LLM, task: PoolingTask, caplog_vllm):
+    if task == "classify":
+        with caplog_vllm.at_level(level=logging.WARNING, logger="vllm"):
+            llm.encode(prompt, pooling_task=task, use_tqdm=False)
+        assert "deprecated" in caplog_vllm.text
+    else:
+        err_msg = "Embedding API is not supported by this model.+"
+
+        with pytest.raises(ValueError, match=err_msg):
+            llm.encode(prompt, pooling_task=task, use_tqdm=False)
--- a/tests/entrypoints/pooling/token_classify/test_online.py
+++ b/tests/entrypoints/pooling/token_classify/test_online.py
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+import requests
+
+from tests.utils import RemoteOpenAIServer
+from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse
+
+MODEL_NAME = "jason9693/Qwen2.5-1.5B-apeach"
+DTYPE = "float32"  # Use float32 to avoid NaN issue
+input_text = "This product was excellent and exceeded my expectations"
+input_tokens = [1986, 1985, 572, 9073, 323, 33808, 847, 16665]
+
+
+@pytest.fixture(scope="module")
+def server():
+    args = [
+        "--enforce-eager",
+        "--max-model-len",
+        "512",
+        "--dtype",
+        DTYPE,
+        "--pooler-config.task",
+        "token_classify",
+    ]
+
+    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
+        yield remote_server
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+async def test_pooling_token_classify(server: RemoteOpenAIServer, model_name: str):
+    task = "token_classify"
+    response = requests.post(
+        server.url_for("pooling"),
+        json={
+            "model": model_name,
+            "input": input_text,
+            "encoding_format": "float",
+            "task": task,
+        },
+    )
+    poolings = PoolingResponse.model_validate(response.json())
+    assert len(poolings.data) == 1
+    assert len(poolings.data[0].data) == 8
+    assert len(poolings.data[0].data[0]) == 2
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.parametrize("task", ["classify", "embed", "token_embed", "plugin"])
+async def test_pooling_not_supported(
+    server: RemoteOpenAIServer, model_name: str, task: str
+):
+    response = requests.post(
+        server.url_for("pooling"),
+        json={
+            "model": model_name,
+            "input": input_text,
+            "encoding_format": "float",
+            "task": task,
+        },
+    )
+
+    if task != "classify":
+        assert response.json()["error"]["type"] == "BadRequestError"
+        err_msg = f"Unsupported task: {task!r}"
+        assert response.json()["error"]["message"].startswith(err_msg)