Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 15:06:22 +01:00
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions
--- a/tests/entrypoints/pooling/openai/test_embedding_long_text.py
+++ b/tests/entrypoints/pooling/openai/test_embedding_long_text.py
@@ -31,7 +31,6 @@ def _generate_random_text(word_count: int) -> str:
        "that",
        "these",
        "those",
-
        # Action verbs
        "create",
        "build",
@@ -80,7 +79,6 @@ def _generate_random_text(word_count: int) -> str:
        "finish",
        "deliver",
        "provide",
-
        # Technology and science nouns
        "system",
        "application",
@@ -132,7 +130,6 @@ def _generate_random_text(word_count: int) -> str:
        "optimization",
        "performance",
        "efficiency",
-
        # General nouns
        "project",
        "team",
@@ -175,7 +172,7 @@ def _generate_random_text(word_count: int) -> str:
        "session",
        "meeting",
        "discussion",
-        "decision"
+        "decision",
    ]

    words = []
@@ -189,7 +186,7 @@ def _generate_random_text(word_count: int) -> str:
    result = []
    for i, word in enumerate(words_list):
        result.append(word)
-        if ((i + 1) % random.randint(10, 20) == 0 and i < len(words_list) - 1):
+        if (i + 1) % random.randint(10, 20) == 0 and i < len(words_list) - 1:
            result[-1] += "."

    return " ".join(result)
@@ -216,9 +213,11 @@ def server_with_chunked_processing():
        "--enforce-eager",
        "--max-model-len",
        "512",  # Set smaller max_model_len to trigger chunking mechanism
-        '--pooler-config',
-        ('{"pooling_type": "MEAN", "normalize": true, '
-         '"enable_chunked_processing": true, "max_embed_len": 10000}'),
+        "--pooler-config",
+        (
+            '{"pooling_type": "MEAN", "normalize": true, '
+            '"enable_chunked_processing": true, "max_embed_len": 10000}'
+        ),
        "--gpu-memory-utilization",
        "0.8",
    ]
@@ -230,23 +229,22 @@ def server_with_chunked_processing():
@pytest_asyncio.fixture
 async def client_with_chunked_processing(server_with_chunked_processing):
    """Create async client with chunking processing support."""
-    async with server_with_chunked_processing.get_async_client(
-    ) as async_client:
+    async with server_with_chunked_processing.get_async_client() as async_client:
        yield async_client


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
 async def test_long_text_embedding_1500_chars(
-        client_with_chunked_processing: openai.AsyncOpenAI, model_name: str):
-    """Test embedding processing for ~1500 character long text 
+    client_with_chunked_processing: openai.AsyncOpenAI, model_name: str
+):
+    """Test embedding processing for ~1500 character long text
    (~1028 tokens, exceeding 512 token limit)."""

    # Verify text length
    # Verify text has sufficient word count (approximately 1500 words)
    word_count = len(LONG_TEXT_1500_WORDS.split())
-    assert word_count >= 1400, (
-        f"Test text word count insufficient: {word_count} words")
+    assert word_count >= 1400, f"Test text word count insufficient: {word_count} words"

    # Send embedding request
    embedding_response = await client_with_chunked_processing.embeddings.create(
@@ -257,12 +255,14 @@ async def test_long_text_embedding_1500_chars(

    # Verify response structure
    embeddings = EmbeddingResponse.model_validate(
-        embedding_response.model_dump(mode="json"))
+        embedding_response.model_dump(mode="json")
+    )

    assert embeddings.id is not None
    assert len(embeddings.data) == 1
-    assert len(embeddings.data[0].embedding
-               ) == 384  # multilingual-e5-small embedding dimension
+    assert (
+        len(embeddings.data[0].embedding) == 384
+    )  # multilingual-e5-small embedding dimension
    assert embeddings.usage.completion_tokens == 0
    # Due to chunked processing, token count should
    # reflect actual processed tokens
@@ -274,26 +274,26 @@ async def test_long_text_embedding_1500_chars(

    # Verify embedding vector validity
    embedding_vector = embeddings.data[0].embedding
-    assert all(
-        isinstance(x, float)
-        for x in embedding_vector), "Embedding vector should contain floats"
-    assert not all(
-        x == 0
-        for x in embedding_vector), "Embedding vector should not be all zeros"
+    assert all(isinstance(x, float) for x in embedding_vector), (
+        "Embedding vector should contain floats"
+    )
+    assert not all(x == 0 for x in embedding_vector), (
+        "Embedding vector should not be all zeros"
+    )


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
 async def test_long_text_embedding_2500_chars(
-        client_with_chunked_processing: openai.AsyncOpenAI, model_name: str):
+    client_with_chunked_processing: openai.AsyncOpenAI, model_name: str
+):
    """Test embedding processing for ~2500 character long text
    (~2048 tokens, requiring multiple chunks)."""

    # Verify text length
    # Verify text has sufficient word count (approximately 2500 words)
    word_count = len(LONG_TEXT_2500_WORDS.split())
-    assert word_count >= 2300, (
-        f"Test text word count insufficient: {word_count} words")
+    assert word_count >= 2300, f"Test text word count insufficient: {word_count} words"

    # Send embedding request
    embedding_response = await client_with_chunked_processing.embeddings.create(
@@ -304,12 +304,14 @@ async def test_long_text_embedding_2500_chars(

    # Verify response structure
    embeddings = EmbeddingResponse.model_validate(
-        embedding_response.model_dump(mode="json"))
+        embedding_response.model_dump(mode="json")
+    )

    assert embeddings.id is not None
    assert len(embeddings.data) == 1
-    assert len(embeddings.data[0].embedding
-               ) == 384  # multilingual-e5-small embedding dimension
+    assert (
+        len(embeddings.data[0].embedding) == 384
+    )  # multilingual-e5-small embedding dimension
    assert embeddings.usage.completion_tokens == 0
    # Due to chunked processing, token count should
    # reflect actual processed tokens
@@ -321,18 +323,19 @@ async def test_long_text_embedding_2500_chars(

    # Verify embedding vector validity
    embedding_vector = embeddings.data[0].embedding
-    assert all(
-        isinstance(x, float)
-        for x in embedding_vector), "Embedding vector should contain floats"
-    assert not all(
-        x == 0
-        for x in embedding_vector), "Embedding vector should not be all zeros"
+    assert all(isinstance(x, float) for x in embedding_vector), (
+        "Embedding vector should contain floats"
+    )
+    assert not all(x == 0 for x in embedding_vector), (
+        "Embedding vector should not be all zeros"
+    )


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
 async def test_batch_long_text_embedding(
-        client_with_chunked_processing: openai.AsyncOpenAI, model_name: str):
+    client_with_chunked_processing: openai.AsyncOpenAI, model_name: str
+):
    """Test batch long text embedding processing."""

    input_texts = [
@@ -350,7 +353,8 @@ async def test_batch_long_text_embedding(

    # Verify response structure
    embeddings = EmbeddingResponse.model_validate(
-        embedding_response.model_dump(mode="json"))
+        embedding_response.model_dump(mode="json")
+    )

    assert embeddings.id is not None
    assert len(embeddings.data) == 3  # Three input texts
@@ -375,13 +379,16 @@ async def test_batch_long_text_embedding(
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
 async def test_chunked_vs_normal_consistency(
-        client_with_chunked_processing: openai.AsyncOpenAI, model_name: str):
+    client_with_chunked_processing: openai.AsyncOpenAI, model_name: str
+):
    """Test consistency between chunked and
    normal processing (using short text)."""

    # Use a short text within the 512 token limit
-    short_text = ("Artificial intelligence technology is changing our world, "
-                  "bringing unprecedented opportunities and challenges.")
+    short_text = (
+        "Artificial intelligence technology is changing our world, "
+        "bringing unprecedented opportunities and challenges."
+    )

    # Send embedding request
    embedding_response = await client_with_chunked_processing.embeddings.create(
@@ -392,7 +399,8 @@ async def test_chunked_vs_normal_consistency(

    # Verify response structure
    embeddings = EmbeddingResponse.model_validate(
-        embedding_response.model_dump(mode="json"))
+        embedding_response.model_dump(mode="json")
+    )

    assert embeddings.id is not None
    assert len(embeddings.data) == 1
@@ -411,7 +419,8 @@ async def test_chunked_vs_normal_consistency(
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
 async def test_chunked_processing_response_format(
-        client_with_chunked_processing: openai.AsyncOpenAI, model_name: str):
+    client_with_chunked_processing: openai.AsyncOpenAI, model_name: str
+):
    """Test response format and structure during chunked processing."""

    # Test with long text to trigger chunking
@@ -423,7 +432,8 @@ async def test_chunked_processing_response_format(

    # Verify response structure
    embeddings = EmbeddingResponse.model_validate(
-        embedding_response.model_dump(mode="json"))
+        embedding_response.model_dump(mode="json")
+    )

    assert embeddings.id is not None
    assert len(embeddings.data) == 1
@@ -433,8 +443,10 @@ async def test_chunked_processing_response_format(
    # Verify embedding vector properties
    embedding_vector = embeddings.data[0].embedding
    import math
+
    vector_norm = math.sqrt(sum(x * x for x in embedding_vector))
    # Check that the vector is normalized
    # (default behavior for most embedding models)
    assert 0.8 < vector_norm < 1.2, (
-        f"Vector norm should be reasonable, actual: {vector_norm}")
+        f"Vector norm should be reasonable, actual: {vector_norm}"
+    )