Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-10-05 15:06:22 +01:00
committed by GitHub
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions

View File

@@ -31,7 +31,6 @@ def _generate_random_text(word_count: int) -> str:
"that",
"these",
"those",
# Action verbs
"create",
"build",
@@ -80,7 +79,6 @@ def _generate_random_text(word_count: int) -> str:
"finish",
"deliver",
"provide",
# Technology and science nouns
"system",
"application",
@@ -132,7 +130,6 @@ def _generate_random_text(word_count: int) -> str:
"optimization",
"performance",
"efficiency",
# General nouns
"project",
"team",
@@ -175,7 +172,7 @@ def _generate_random_text(word_count: int) -> str:
"session",
"meeting",
"discussion",
"decision"
"decision",
]
words = []
@@ -189,7 +186,7 @@ def _generate_random_text(word_count: int) -> str:
result = []
for i, word in enumerate(words_list):
result.append(word)
if ((i + 1) % random.randint(10, 20) == 0 and i < len(words_list) - 1):
if (i + 1) % random.randint(10, 20) == 0 and i < len(words_list) - 1:
result[-1] += "."
return " ".join(result)
@@ -216,9 +213,11 @@ def server_with_chunked_processing():
"--enforce-eager",
"--max-model-len",
"512", # Set smaller max_model_len to trigger chunking mechanism
'--pooler-config',
('{"pooling_type": "MEAN", "normalize": true, '
'"enable_chunked_processing": true, "max_embed_len": 10000}'),
"--pooler-config",
(
'{"pooling_type": "MEAN", "normalize": true, '
'"enable_chunked_processing": true, "max_embed_len": 10000}'
),
"--gpu-memory-utilization",
"0.8",
]
@@ -230,23 +229,22 @@ def server_with_chunked_processing():
@pytest_asyncio.fixture
async def client_with_chunked_processing(server_with_chunked_processing):
"""Create async client with chunking processing support."""
async with server_with_chunked_processing.get_async_client(
) as async_client:
async with server_with_chunked_processing.get_async_client() as async_client:
yield async_client
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_long_text_embedding_1500_chars(
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str):
"""Test embedding processing for ~1500 character long text
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str
):
"""Test embedding processing for ~1500 character long text
(~1028 tokens, exceeding 512 token limit)."""
# Verify text length
# Verify text has sufficient word count (approximately 1500 words)
word_count = len(LONG_TEXT_1500_WORDS.split())
assert word_count >= 1400, (
f"Test text word count insufficient: {word_count} words")
assert word_count >= 1400, f"Test text word count insufficient: {word_count} words"
# Send embedding request
embedding_response = await client_with_chunked_processing.embeddings.create(
@@ -257,12 +255,14 @@ async def test_long_text_embedding_1500_chars(
# Verify response structure
embeddings = EmbeddingResponse.model_validate(
embedding_response.model_dump(mode="json"))
embedding_response.model_dump(mode="json")
)
assert embeddings.id is not None
assert len(embeddings.data) == 1
assert len(embeddings.data[0].embedding
) == 384 # multilingual-e5-small embedding dimension
assert (
len(embeddings.data[0].embedding) == 384
) # multilingual-e5-small embedding dimension
assert embeddings.usage.completion_tokens == 0
# Due to chunked processing, token count should
# reflect actual processed tokens
@@ -274,26 +274,26 @@ async def test_long_text_embedding_1500_chars(
# Verify embedding vector validity
embedding_vector = embeddings.data[0].embedding
assert all(
isinstance(x, float)
for x in embedding_vector), "Embedding vector should contain floats"
assert not all(
x == 0
for x in embedding_vector), "Embedding vector should not be all zeros"
assert all(isinstance(x, float) for x in embedding_vector), (
"Embedding vector should contain floats"
)
assert not all(x == 0 for x in embedding_vector), (
"Embedding vector should not be all zeros"
)
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_long_text_embedding_2500_chars(
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str):
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str
):
"""Test embedding processing for ~2500 character long text
(~2048 tokens, requiring multiple chunks)."""
# Verify text length
# Verify text has sufficient word count (approximately 2500 words)
word_count = len(LONG_TEXT_2500_WORDS.split())
assert word_count >= 2300, (
f"Test text word count insufficient: {word_count} words")
assert word_count >= 2300, f"Test text word count insufficient: {word_count} words"
# Send embedding request
embedding_response = await client_with_chunked_processing.embeddings.create(
@@ -304,12 +304,14 @@ async def test_long_text_embedding_2500_chars(
# Verify response structure
embeddings = EmbeddingResponse.model_validate(
embedding_response.model_dump(mode="json"))
embedding_response.model_dump(mode="json")
)
assert embeddings.id is not None
assert len(embeddings.data) == 1
assert len(embeddings.data[0].embedding
) == 384 # multilingual-e5-small embedding dimension
assert (
len(embeddings.data[0].embedding) == 384
) # multilingual-e5-small embedding dimension
assert embeddings.usage.completion_tokens == 0
# Due to chunked processing, token count should
# reflect actual processed tokens
@@ -321,18 +323,19 @@ async def test_long_text_embedding_2500_chars(
# Verify embedding vector validity
embedding_vector = embeddings.data[0].embedding
assert all(
isinstance(x, float)
for x in embedding_vector), "Embedding vector should contain floats"
assert not all(
x == 0
for x in embedding_vector), "Embedding vector should not be all zeros"
assert all(isinstance(x, float) for x in embedding_vector), (
"Embedding vector should contain floats"
)
assert not all(x == 0 for x in embedding_vector), (
"Embedding vector should not be all zeros"
)
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_batch_long_text_embedding(
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str):
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str
):
"""Test batch long text embedding processing."""
input_texts = [
@@ -350,7 +353,8 @@ async def test_batch_long_text_embedding(
# Verify response structure
embeddings = EmbeddingResponse.model_validate(
embedding_response.model_dump(mode="json"))
embedding_response.model_dump(mode="json")
)
assert embeddings.id is not None
assert len(embeddings.data) == 3 # Three input texts
@@ -375,13 +379,16 @@ async def test_batch_long_text_embedding(
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_chunked_vs_normal_consistency(
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str):
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str
):
"""Test consistency between chunked and
normal processing (using short text)."""
# Use a short text within the 512 token limit
short_text = ("Artificial intelligence technology is changing our world, "
"bringing unprecedented opportunities and challenges.")
short_text = (
"Artificial intelligence technology is changing our world, "
"bringing unprecedented opportunities and challenges."
)
# Send embedding request
embedding_response = await client_with_chunked_processing.embeddings.create(
@@ -392,7 +399,8 @@ async def test_chunked_vs_normal_consistency(
# Verify response structure
embeddings = EmbeddingResponse.model_validate(
embedding_response.model_dump(mode="json"))
embedding_response.model_dump(mode="json")
)
assert embeddings.id is not None
assert len(embeddings.data) == 1
@@ -411,7 +419,8 @@ async def test_chunked_vs_normal_consistency(
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_chunked_processing_response_format(
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str):
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str
):
"""Test response format and structure during chunked processing."""
# Test with long text to trigger chunking
@@ -423,7 +432,8 @@ async def test_chunked_processing_response_format(
# Verify response structure
embeddings = EmbeddingResponse.model_validate(
embedding_response.model_dump(mode="json"))
embedding_response.model_dump(mode="json")
)
assert embeddings.id is not None
assert len(embeddings.data) == 1
@@ -433,8 +443,10 @@ async def test_chunked_processing_response_format(
# Verify embedding vector properties
embedding_vector = embeddings.data[0].embedding
import math
vector_norm = math.sqrt(sum(x * x for x in embedding_vector))
# Check that the vector is normalized
# (default behavior for most embedding models)
assert 0.8 < vector_norm < 1.2, (
f"Vector norm should be reasonable, actual: {vector_norm}")
f"Vector norm should be reasonable, actual: {vector_norm}"
)