Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 15:06:22 +01:00
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions
--- a/tests/entrypoints/pooling/openai/test_embedding.py
+++ b/tests/entrypoints/pooling/openai/test_embedding.py
@@ -11,8 +11,7 @@ import requests
 import torch
 import torch.nn.functional as F

-from tests.models.language.pooling.embed_utils import (
-    run_embedding_correctness_test)
+from tests.models.language.pooling.embed_utils import run_embedding_correctness_test
 from tests.models.utils import check_embeddings_close
 from tests.utils import RemoteOpenAIServer
 from vllm.entrypoints.openai.protocol import EmbeddingResponse
@@ -50,15 +49,13 @@ async def client(server):

@pytest.fixture(scope="module")
 def hf_model(hf_runner):
-    with hf_runner(MODEL_NAME, dtype=DTYPE,
-                   is_sentence_transformer=True) as hf_model:
+    with hf_runner(MODEL_NAME, dtype=DTYPE, is_sentence_transformer=True) as hf_model:
        yield hf_model


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
-async def test_single_embedding(hf_model, client: openai.AsyncOpenAI,
-                                model_name: str):
+async def test_single_embedding(hf_model, client: openai.AsyncOpenAI, model_name: str):
    input_texts = [
        "The chef prepared a delicious meal.",
    ]
@@ -70,7 +67,8 @@ async def test_single_embedding(hf_model, client: openai.AsyncOpenAI,
        encoding_format="float",
    )
    embeddings = EmbeddingResponse.model_validate(
-        embedding_response.model_dump(mode="json"))
+        embedding_response.model_dump(mode="json")
+    )

    assert embeddings.id is not None
    assert len(embeddings.data) == 1
@@ -90,7 +88,8 @@ async def test_single_embedding(hf_model, client: openai.AsyncOpenAI,
        encoding_format="float",
    )
    embeddings = EmbeddingResponse.model_validate(
-        embedding_response.model_dump(mode="json"))
+        embedding_response.model_dump(mode="json")
+    )

    assert embeddings.id is not None
    assert len(embeddings.data) == 1
@@ -102,12 +101,12 @@ async def test_single_embedding(hf_model, client: openai.AsyncOpenAI,

@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
-async def test_batch_embedding(hf_model, client: openai.AsyncOpenAI,
-                               model_name: str):
+async def test_batch_embedding(hf_model, client: openai.AsyncOpenAI, model_name: str):
    # test list[str]
    input_texts = [
-        "The cat sat on the mat.", "A feline was resting on a rug.",
-        "Stars twinkle brightly in the night sky."
+        "The cat sat on the mat.",
+        "A feline was resting on a rug.",
+        "Stars twinkle brightly in the night sky.",
    ]
    embedding_response = await client.embeddings.create(
        model=model_name,
@@ -115,7 +114,8 @@ async def test_batch_embedding(hf_model, client: openai.AsyncOpenAI,
        encoding_format="float",
    )
    embeddings = EmbeddingResponse.model_validate(
-        embedding_response.model_dump(mode="json"))
+        embedding_response.model_dump(mode="json")
+    )

    assert embeddings.id is not None
    assert len(embeddings.data) == 3
@@ -128,15 +128,20 @@ async def test_batch_embedding(hf_model, client: openai.AsyncOpenAI,
    run_embedding_correctness_test(hf_model, input_texts, vllm_outputs)

    # test list[list[int]]
-    input_tokens = [[4, 5, 7, 9, 20], [15, 29, 499], [24, 24, 24, 24, 24],
-                    [25, 32, 64, 77]]
+    input_tokens = [
+        [4, 5, 7, 9, 20],
+        [15, 29, 499],
+        [24, 24, 24, 24, 24],
+        [25, 32, 64, 77],
+    ]
    embedding_response = await client.embeddings.create(
        model=model_name,
        input=input_tokens,
        encoding_format="float",
    )
    embeddings = EmbeddingResponse.model_validate(
-        embedding_response.model_dump(mode="json"))
+        embedding_response.model_dump(mode="json")
+    )

    assert embeddings.id is not None
    assert len(embeddings.data) == 4
@@ -148,19 +153,23 @@ async def test_batch_embedding(hf_model, client: openai.AsyncOpenAI,

@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
-async def test_conversation_embedding(server: RemoteOpenAIServer,
-                                      client: openai.AsyncOpenAI,
-                                      model_name: str):
-    messages = [{
-        "role": "user",
-        "content": "The cat sat on the mat.",
-    }, {
-        "role": "assistant",
-        "content": "A feline was resting on a rug.",
-    }, {
-        "role": "user",
-        "content": "Stars twinkle brightly in the night sky.",
-    }]
+async def test_conversation_embedding(
+    server: RemoteOpenAIServer, client: openai.AsyncOpenAI, model_name: str
+):
+    messages = [
+        {
+            "role": "user",
+            "content": "The cat sat on the mat.",
+        },
+        {
+            "role": "assistant",
+            "content": "A feline was resting on a rug.",
+        },
+        {
+            "role": "user",
+            "content": "Stars twinkle brightly in the night sky.",
+        },
+    ]

    chat_response = requests.post(
        server.url_for("v1/embeddings"),
@@ -189,64 +198,66 @@ async def test_conversation_embedding(server: RemoteOpenAIServer,
        extra_body={"add_special_tokens": False},
    )
    completion_embeddings = EmbeddingResponse.model_validate(
-        completion_response.model_dump(mode="json"))
+        completion_response.model_dump(mode="json")
+    )

    assert chat_embeddings.id is not None
    assert completion_embeddings.id is not None
    assert chat_embeddings.created <= completion_embeddings.created
-    assert chat_embeddings.model_dump(
-        exclude={"id", "created"}) == (completion_embeddings.model_dump(
-            exclude={"id", "created"}))
+    assert chat_embeddings.model_dump(exclude={"id", "created"}) == (
+        completion_embeddings.model_dump(exclude={"id", "created"})
+    )


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
-async def test_batch_base64_embedding(hf_model, client: openai.AsyncOpenAI,
-                                      model_name: str):
+async def test_batch_base64_embedding(
+    hf_model, client: openai.AsyncOpenAI, model_name: str
+):
    input_texts = [
        "Hello my name is",
-        "The best thing about vLLM is that it supports many different models"
+        "The best thing about vLLM is that it supports many different models",
    ]

-    responses_float = await client.embeddings.create(input=input_texts,
-                                                     model=model_name,
-                                                     encoding_format="float")
+    responses_float = await client.embeddings.create(
+        input=input_texts, model=model_name, encoding_format="float"
+    )
    float_data = [d.embedding for d in responses_float.data]
    run_embedding_correctness_test(hf_model, input_texts, float_data)

-    responses_base64 = await client.embeddings.create(input=input_texts,
-                                                      model=model_name,
-                                                      encoding_format="base64")
+    responses_base64 = await client.embeddings.create(
+        input=input_texts, model=model_name, encoding_format="base64"
+    )
    base64_data = []
    for data in responses_base64.data:
        base64_data.append(
-            np.frombuffer(base64.b64decode(data.embedding),
-                          dtype="float32").tolist())
+            np.frombuffer(base64.b64decode(data.embedding), dtype="float32").tolist()
+        )

    run_embedding_correctness_test(hf_model, input_texts, base64_data)

    # Default response is float32 decoded from base64 by OpenAI Client
-    responses_default = await client.embeddings.create(input=input_texts,
-                                                       model=model_name)
+    responses_default = await client.embeddings.create(
+        input=input_texts, model=model_name
+    )
    default_data = [d.embedding for d in responses_default.data]
    run_embedding_correctness_test(hf_model, input_texts, default_data)


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
-async def test_single_embedding_truncation(client: openai.AsyncOpenAI,
-                                           model_name: str):
+async def test_single_embedding_truncation(client: openai.AsyncOpenAI, model_name: str):
    input_texts = [
        "Como o Brasil pode fomentar o desenvolvimento de modelos de IA?",
    ]

    # test single embedding
    embedding_response = await client.embeddings.create(
-        model=model_name,
-        input=input_texts,
-        extra_body={"truncate_prompt_tokens": 10})
+        model=model_name, input=input_texts, extra_body={"truncate_prompt_tokens": 10}
+    )
    embeddings = EmbeddingResponse.model_validate(
-        embedding_response.model_dump(mode="json"))
+        embedding_response.model_dump(mode="json")
+    )

    assert embeddings.id is not None
    assert len(embeddings.data) == 1
@@ -256,15 +267,34 @@ async def test_single_embedding_truncation(client: openai.AsyncOpenAI,
    assert embeddings.usage.total_tokens == 10

    input_tokens = [
-        1, 24428, 289, 18341, 26165, 285, 19323, 283, 289, 26789, 3871, 28728,
-        9901, 340, 2229, 385, 340, 315, 28741, 28804, 2
+        1,
+        24428,
+        289,
+        18341,
+        26165,
+        285,
+        19323,
+        283,
+        289,
+        26789,
+        3871,
+        28728,
+        9901,
+        340,
+        2229,
+        385,
+        340,
+        315,
+        28741,
+        28804,
+        2,
    ]
    embedding_response = await client.embeddings.create(
-        model=model_name,
-        input=input_tokens,
-        extra_body={"truncate_prompt_tokens": 10})
+        model=model_name, input=input_tokens, extra_body={"truncate_prompt_tokens": 10}
+    )
    embeddings = EmbeddingResponse.model_validate(
-        embedding_response.model_dump(mode="json"))
+        embedding_response.model_dump(mode="json")
+    )

    assert embeddings.id is not None
    assert len(embeddings.data) == 1
@@ -276,8 +306,9 @@ async def test_single_embedding_truncation(client: openai.AsyncOpenAI,

@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
-async def test_single_embedding_truncation_invalid(client: openai.AsyncOpenAI,
-                                                   model_name: str):
+async def test_single_embedding_truncation_invalid(
+    client: openai.AsyncOpenAI, model_name: str
+):
    input_texts = [
        "Como o Brasil pode fomentar o desenvolvimento de modelos de IA?",
    ]
@@ -286,15 +317,17 @@ async def test_single_embedding_truncation_invalid(client: openai.AsyncOpenAI,
        response = await client.embeddings.create(
            model=model_name,
            input=input_texts,
-            extra_body={"truncate_prompt_tokens": 8193})
+            extra_body={"truncate_prompt_tokens": 8193},
+        )
        assert "error" in response.object
-        assert "truncate_prompt_tokens value is greater than max_model_len. "\
-               "Please, select a smaller truncation size." in response.message
+        assert (
+            "truncate_prompt_tokens value is greater than max_model_len. "
+            "Please, select a smaller truncation size." in response.message
+        )


@pytest.mark.asyncio
-async def test_invocations(server: RemoteOpenAIServer,
-                           client: openai.AsyncOpenAI):
+async def test_invocations(server: RemoteOpenAIServer, client: openai.AsyncOpenAI):
    input_texts = [
        "The chef prepared a delicious meal.",
    ]
@@ -307,35 +340,43 @@ async def test_invocations(server: RemoteOpenAIServer,

    completion_response = await client.embeddings.create(**request_args)

-    invocation_response = requests.post(server.url_for("invocations"),
-                                        json=request_args)
+    invocation_response = requests.post(
+        server.url_for("invocations"), json=request_args
+    )
    invocation_response.raise_for_status()

    completion_output = completion_response.model_dump()
    invocation_output = invocation_response.json()

    assert completion_output.keys() == invocation_output.keys()
-    for completion_data, invocation_data in zip(completion_output["data"],
-                                                invocation_output["data"]):
+    for completion_data, invocation_data in zip(
+        completion_output["data"], invocation_output["data"]
+    ):
        assert completion_data.keys() == invocation_data.keys()
-        check_embeddings_close(embeddings_0_lst=[completion_data["embedding"]],
-                               embeddings_1_lst=[invocation_data["embedding"]],
-                               name_0="completion",
-                               name_1="invocation")
+        check_embeddings_close(
+            embeddings_0_lst=[completion_data["embedding"]],
+            embeddings_1_lst=[invocation_data["embedding"]],
+            name_0="completion",
+            name_1="invocation",
+        )


@pytest.mark.asyncio
 async def test_invocations_conversation(server: RemoteOpenAIServer):
-    messages = [{
-        "role": "user",
-        "content": "The cat sat on the mat.",
-    }, {
-        "role": "assistant",
-        "content": "A feline was resting on a rug.",
-    }, {
-        "role": "user",
-        "content": "Stars twinkle brightly in the night sky.",
-    }]
+    messages = [
+        {
+            "role": "user",
+            "content": "The cat sat on the mat.",
+        },
+        {
+            "role": "assistant",
+            "content": "A feline was resting on a rug.",
+        },
+        {
+            "role": "user",
+            "content": "Stars twinkle brightly in the night sky.",
+        },
+    ]

    request_args = {
        "model": MODEL_NAME,
@@ -343,25 +384,28 @@ async def test_invocations_conversation(server: RemoteOpenAIServer):
        "encoding_format": "float",
    }

-    chat_response = requests.post(server.url_for("v1/embeddings"),
-                                  json=request_args)
+    chat_response = requests.post(server.url_for("v1/embeddings"), json=request_args)
    chat_response.raise_for_status()

-    invocation_response = requests.post(server.url_for("invocations"),
-                                        json=request_args)
+    invocation_response = requests.post(
+        server.url_for("invocations"), json=request_args
+    )
    invocation_response.raise_for_status()

    chat_output = chat_response.json()
    invocation_output = invocation_response.json()

    assert chat_output.keys() == invocation_output.keys()
-    for chat_data, invocation_data in zip(chat_output["data"],
-                                          invocation_output["data"]):
+    for chat_data, invocation_data in zip(
+        chat_output["data"], invocation_output["data"]
+    ):
        assert chat_data.keys() == invocation_data.keys()
-        check_embeddings_close(embeddings_0_lst=[chat_data["embedding"]],
-                               embeddings_1_lst=[invocation_data["embedding"]],
-                               name_0="chat",
-                               name_1="invocation")
+        check_embeddings_close(
+            embeddings_0_lst=[chat_data["embedding"]],
+            embeddings_1_lst=[invocation_data["embedding"]],
+            name_0="chat",
+            name_1="invocation",
+        )


@pytest.mark.asyncio
@@ -374,23 +418,22 @@ async def test_normalize(server: RemoteOpenAIServer, model_name: str):
            "model": MODEL_NAME,
            "input": input_text,
            "encoding_format": "float",
-            "normalize": normalize
+            "normalize": normalize,
        }

-        response = requests.post(server.url_for("v1/embeddings"),
-                                 json=request_args)
+        response = requests.post(server.url_for("v1/embeddings"), json=request_args)
        outputs = response.json()

-        return torch.tensor([x['embedding'] for x in outputs["data"]])
+        return torch.tensor([x["embedding"] for x in outputs["data"]])

    default = await get_outputs(normalize=None)
    w_normal = await get_outputs(normalize=True)
    wo_normal = await get_outputs(normalize=False)

-    assert torch.allclose(default, w_normal,
-                          atol=1e-2), "Default should use normal."
-    assert not torch.allclose(w_normal, wo_normal,
-                              atol=1e-2), "wo_normal should not use normal."
-    assert torch.allclose(
-        w_normal, F.normalize(wo_normal, p=2, dim=-1),
-        atol=1e-2), "w_normal should be close to normal(wo_normal)."
+    assert torch.allclose(default, w_normal, atol=1e-2), "Default should use normal."
+    assert not torch.allclose(w_normal, wo_normal, atol=1e-2), (
+        "wo_normal should not use normal."
+    )
+    assert torch.allclose(w_normal, F.normalize(wo_normal, p=2, dim=-1), atol=1e-2), (
+        "w_normal should be close to normal(wo_normal)."
+    )