vllm/tests/entrypoints/pooling/embed/test_protocol.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Unit tests for Cohere embed protocol: build_typed_embeddings and its
underlying packing helpers, plus Cohere-specific serving helpers."""

import struct

import numpy as np
import pybase64 as base64
import pytest

from vllm.entrypoints.pooling.embed.protocol import (
    build_typed_embeddings,
)


@pytest.fixture
def sample_embeddings() -> list[list[float]]:
    return [
        [0.1, -0.2, 0.3, -0.4, 0.5, -0.6, 0.7, -0.8],
        [-0.05, 0.15, -0.25, 0.35, -0.45, 0.55, -0.65, 0.75],
    ]


class TestBuildTypedEmbeddingsFloat:
    def test_float_passthrough(self, sample_embeddings: list[list[float]]):
        result = build_typed_embeddings(sample_embeddings, ["float"])
        assert result.float == sample_embeddings
        assert result.binary is None

    def test_empty_input(self):
        result = build_typed_embeddings([], ["float"])
        assert result.float == []


class TestBuildTypedEmbeddingsBinary:
    def test_binary_packing(self):
        # 8 values: positive->1, negative->0 => bits: 10101010 = 0xAA = 170
        # signed: 170 - 128 = 42
        embs = [[1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0]]
        result = build_typed_embeddings(embs, ["binary"])
        assert result.binary is not None
        assert result.binary[0] == [42]

    def test_ubinary_packing(self):
        embs = [[1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0]]
        result = build_typed_embeddings(embs, ["ubinary"])
        assert result.ubinary is not None
        assert result.ubinary[0] == [170]  # 0b10101010

    def test_binary_all_positive(self):
        embs = [[0.1] * 8]
        result = build_typed_embeddings(embs, ["binary"])
        assert result.binary is not None
        # all bits = 1 => 0xFF = 255, signed: 255 - 128 = 127
        assert result.binary[0] == [127]

    def test_binary_all_negative(self):
        embs = [[-0.1] * 8]
        result = build_typed_embeddings(embs, ["binary"])
        assert result.binary is not None
        # all bits = 0, signed: 0 - 128 = -128
        assert result.binary[0] == [-128]

    def test_binary_dimension_is_eighth(self, sample_embeddings: list[list[float]]):
        result = build_typed_embeddings(sample_embeddings, ["binary"])
        assert result.binary is not None
        for orig, packed in zip(sample_embeddings, result.binary):
            assert len(packed) == len(orig) // 8

    def test_zero_treated_as_positive(self):
        embs = [[0.0] * 8]
        result = build_typed_embeddings(embs, ["binary"])
        assert result.binary is not None
        # 0.0 >= 0 is True, so bit=1 for all => 127 (signed)
        assert result.binary[0] == [127]

    def test_non_multiple_of_8_raises(self):
        embs = [[0.1] * 7]
        with pytest.raises(ValueError, match="multiple of 8"):
            build_typed_embeddings(embs, ["binary"])

    def test_ubinary_non_multiple_of_8_raises(self):
        embs = [[0.1] * 10]
        with pytest.raises(ValueError, match="multiple of 8"):
            build_typed_embeddings(embs, ["ubinary"])


class TestBuildTypedEmbeddingsBase64:
    def test_base64_roundtrip(self, sample_embeddings: list[list[float]]):
        result = build_typed_embeddings(sample_embeddings, ["base64"])
        assert result.base64 is not None
        assert len(result.base64) == 2

        for orig, b64_str in zip(sample_embeddings, result.base64):
            decoded = base64.b64decode(b64_str)
            n = len(orig)
            values = struct.unpack(f"<{n}f", decoded)
            np.testing.assert_allclose(orig, values, rtol=1e-5)

    def test_base64_byte_length(self):
        embs = [[0.1, 0.2, 0.3]]
        result = build_typed_embeddings(embs, ["base64"])
        assert result.base64 is not None
        raw = base64.b64decode(result.base64[0])
        assert len(raw) == 3 * 4  # 3 floats * 4 bytes each


class TestBuildTypedEmbeddingsMultiple:
    def test_all_types_at_once(self, sample_embeddings: list[list[float]]):
        result = build_typed_embeddings(
            sample_embeddings,
            ["float", "binary", "ubinary", "base64"],
        )
        assert result.float is not None
        assert result.binary is not None
        assert result.ubinary is not None
        assert result.base64 is not None

    def test_subset_types(self, sample_embeddings: list[list[float]]):
        result = build_typed_embeddings(sample_embeddings, ["float", "binary"])
        assert result.float is not None
        assert result.binary is not None
        assert result.ubinary is None
        assert result.base64 is None

    def test_unknown_type_ignored(self, sample_embeddings: list[list[float]]):
        result = build_typed_embeddings(sample_embeddings, ["float", "unknown_type"])
        assert result.float is not None