# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import pytest import requests from tests.utils import RemoteOpenAIServer from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse MODEL_NAME = "intfloat/multilingual-e5-small" DTYPE = "bfloat16" input_text = "The best thing about vLLM is that it supports many different models" input_tokens = [ 0, 581, 2965, 13580, 1672, 81, 23708, 594, 83, 450, 442, 8060, 7, 5941, 12921, 115774, 2, ] @pytest.fixture(scope="module") def server(): args = [ "--runner", "pooling", "--dtype", DTYPE, "--enforce-eager", "--max-model-len", "512", "--pooler-config.task", "token_embed", ] with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: yield remote_server @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) async def test_pooling_token_embed(server: RemoteOpenAIServer, model_name: str): task = "token_embed" response = requests.post( server.url_for("pooling"), json={ "model": model_name, "input": input_text, "encoding_format": "float", "task": task, }, ) poolings = PoolingResponse.model_validate(response.json()) assert len(poolings.data) == 1 assert len(poolings.data[0].data) == len(input_tokens) assert len(poolings.data[0].data[0]) == 384 @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) @pytest.mark.parametrize("task", ["embed", "classify", "token_classify", "plugin"]) async def test_pooling_not_supported( server: RemoteOpenAIServer, model_name: str, task: str ): response = requests.post( server.url_for("pooling"), json={ "model": model_name, "input": "test", "encoding_format": "float", "task": task, }, ) if task != "embed": assert response.json()["error"]["type"] == "BadRequestError" err_msg = f"Unsupported task: {task!r}" assert response.json()["error"]["message"].startswith(err_msg)