Convert formatting to use ruff instead of yapf + isort (#26247)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -31,7 +31,6 @@ def _generate_random_text(word_count: int) -> str:
|
||||
"that",
|
||||
"these",
|
||||
"those",
|
||||
|
||||
# Action verbs
|
||||
"create",
|
||||
"build",
|
||||
@@ -80,7 +79,6 @@ def _generate_random_text(word_count: int) -> str:
|
||||
"finish",
|
||||
"deliver",
|
||||
"provide",
|
||||
|
||||
# Technology and science nouns
|
||||
"system",
|
||||
"application",
|
||||
@@ -132,7 +130,6 @@ def _generate_random_text(word_count: int) -> str:
|
||||
"optimization",
|
||||
"performance",
|
||||
"efficiency",
|
||||
|
||||
# General nouns
|
||||
"project",
|
||||
"team",
|
||||
@@ -175,7 +172,7 @@ def _generate_random_text(word_count: int) -> str:
|
||||
"session",
|
||||
"meeting",
|
||||
"discussion",
|
||||
"decision"
|
||||
"decision",
|
||||
]
|
||||
|
||||
words = []
|
||||
@@ -189,7 +186,7 @@ def _generate_random_text(word_count: int) -> str:
|
||||
result = []
|
||||
for i, word in enumerate(words_list):
|
||||
result.append(word)
|
||||
if ((i + 1) % random.randint(10, 20) == 0 and i < len(words_list) - 1):
|
||||
if (i + 1) % random.randint(10, 20) == 0 and i < len(words_list) - 1:
|
||||
result[-1] += "."
|
||||
|
||||
return " ".join(result)
|
||||
@@ -216,9 +213,11 @@ def server_with_chunked_processing():
|
||||
"--enforce-eager",
|
||||
"--max-model-len",
|
||||
"512", # Set smaller max_model_len to trigger chunking mechanism
|
||||
'--pooler-config',
|
||||
('{"pooling_type": "MEAN", "normalize": true, '
|
||||
'"enable_chunked_processing": true, "max_embed_len": 10000}'),
|
||||
"--pooler-config",
|
||||
(
|
||||
'{"pooling_type": "MEAN", "normalize": true, '
|
||||
'"enable_chunked_processing": true, "max_embed_len": 10000}'
|
||||
),
|
||||
"--gpu-memory-utilization",
|
||||
"0.8",
|
||||
]
|
||||
@@ -230,23 +229,22 @@ def server_with_chunked_processing():
|
||||
@pytest_asyncio.fixture
|
||||
async def client_with_chunked_processing(server_with_chunked_processing):
|
||||
"""Create async client with chunking processing support."""
|
||||
async with server_with_chunked_processing.get_async_client(
|
||||
) as async_client:
|
||||
async with server_with_chunked_processing.get_async_client() as async_client:
|
||||
yield async_client
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_long_text_embedding_1500_chars(
|
||||
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str):
|
||||
"""Test embedding processing for ~1500 character long text
|
||||
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str
|
||||
):
|
||||
"""Test embedding processing for ~1500 character long text
|
||||
(~1028 tokens, exceeding 512 token limit)."""
|
||||
|
||||
# Verify text length
|
||||
# Verify text has sufficient word count (approximately 1500 words)
|
||||
word_count = len(LONG_TEXT_1500_WORDS.split())
|
||||
assert word_count >= 1400, (
|
||||
f"Test text word count insufficient: {word_count} words")
|
||||
assert word_count >= 1400, f"Test text word count insufficient: {word_count} words"
|
||||
|
||||
# Send embedding request
|
||||
embedding_response = await client_with_chunked_processing.embeddings.create(
|
||||
@@ -257,12 +255,14 @@ async def test_long_text_embedding_1500_chars(
|
||||
|
||||
# Verify response structure
|
||||
embeddings = EmbeddingResponse.model_validate(
|
||||
embedding_response.model_dump(mode="json"))
|
||||
embedding_response.model_dump(mode="json")
|
||||
)
|
||||
|
||||
assert embeddings.id is not None
|
||||
assert len(embeddings.data) == 1
|
||||
assert len(embeddings.data[0].embedding
|
||||
) == 384 # multilingual-e5-small embedding dimension
|
||||
assert (
|
||||
len(embeddings.data[0].embedding) == 384
|
||||
) # multilingual-e5-small embedding dimension
|
||||
assert embeddings.usage.completion_tokens == 0
|
||||
# Due to chunked processing, token count should
|
||||
# reflect actual processed tokens
|
||||
@@ -274,26 +274,26 @@ async def test_long_text_embedding_1500_chars(
|
||||
|
||||
# Verify embedding vector validity
|
||||
embedding_vector = embeddings.data[0].embedding
|
||||
assert all(
|
||||
isinstance(x, float)
|
||||
for x in embedding_vector), "Embedding vector should contain floats"
|
||||
assert not all(
|
||||
x == 0
|
||||
for x in embedding_vector), "Embedding vector should not be all zeros"
|
||||
assert all(isinstance(x, float) for x in embedding_vector), (
|
||||
"Embedding vector should contain floats"
|
||||
)
|
||||
assert not all(x == 0 for x in embedding_vector), (
|
||||
"Embedding vector should not be all zeros"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_long_text_embedding_2500_chars(
|
||||
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str):
|
||||
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str
|
||||
):
|
||||
"""Test embedding processing for ~2500 character long text
|
||||
(~2048 tokens, requiring multiple chunks)."""
|
||||
|
||||
# Verify text length
|
||||
# Verify text has sufficient word count (approximately 2500 words)
|
||||
word_count = len(LONG_TEXT_2500_WORDS.split())
|
||||
assert word_count >= 2300, (
|
||||
f"Test text word count insufficient: {word_count} words")
|
||||
assert word_count >= 2300, f"Test text word count insufficient: {word_count} words"
|
||||
|
||||
# Send embedding request
|
||||
embedding_response = await client_with_chunked_processing.embeddings.create(
|
||||
@@ -304,12 +304,14 @@ async def test_long_text_embedding_2500_chars(
|
||||
|
||||
# Verify response structure
|
||||
embeddings = EmbeddingResponse.model_validate(
|
||||
embedding_response.model_dump(mode="json"))
|
||||
embedding_response.model_dump(mode="json")
|
||||
)
|
||||
|
||||
assert embeddings.id is not None
|
||||
assert len(embeddings.data) == 1
|
||||
assert len(embeddings.data[0].embedding
|
||||
) == 384 # multilingual-e5-small embedding dimension
|
||||
assert (
|
||||
len(embeddings.data[0].embedding) == 384
|
||||
) # multilingual-e5-small embedding dimension
|
||||
assert embeddings.usage.completion_tokens == 0
|
||||
# Due to chunked processing, token count should
|
||||
# reflect actual processed tokens
|
||||
@@ -321,18 +323,19 @@ async def test_long_text_embedding_2500_chars(
|
||||
|
||||
# Verify embedding vector validity
|
||||
embedding_vector = embeddings.data[0].embedding
|
||||
assert all(
|
||||
isinstance(x, float)
|
||||
for x in embedding_vector), "Embedding vector should contain floats"
|
||||
assert not all(
|
||||
x == 0
|
||||
for x in embedding_vector), "Embedding vector should not be all zeros"
|
||||
assert all(isinstance(x, float) for x in embedding_vector), (
|
||||
"Embedding vector should contain floats"
|
||||
)
|
||||
assert not all(x == 0 for x in embedding_vector), (
|
||||
"Embedding vector should not be all zeros"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_batch_long_text_embedding(
|
||||
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str):
|
||||
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str
|
||||
):
|
||||
"""Test batch long text embedding processing."""
|
||||
|
||||
input_texts = [
|
||||
@@ -350,7 +353,8 @@ async def test_batch_long_text_embedding(
|
||||
|
||||
# Verify response structure
|
||||
embeddings = EmbeddingResponse.model_validate(
|
||||
embedding_response.model_dump(mode="json"))
|
||||
embedding_response.model_dump(mode="json")
|
||||
)
|
||||
|
||||
assert embeddings.id is not None
|
||||
assert len(embeddings.data) == 3 # Three input texts
|
||||
@@ -375,13 +379,16 @@ async def test_batch_long_text_embedding(
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_chunked_vs_normal_consistency(
|
||||
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str):
|
||||
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str
|
||||
):
|
||||
"""Test consistency between chunked and
|
||||
normal processing (using short text)."""
|
||||
|
||||
# Use a short text within the 512 token limit
|
||||
short_text = ("Artificial intelligence technology is changing our world, "
|
||||
"bringing unprecedented opportunities and challenges.")
|
||||
short_text = (
|
||||
"Artificial intelligence technology is changing our world, "
|
||||
"bringing unprecedented opportunities and challenges."
|
||||
)
|
||||
|
||||
# Send embedding request
|
||||
embedding_response = await client_with_chunked_processing.embeddings.create(
|
||||
@@ -392,7 +399,8 @@ async def test_chunked_vs_normal_consistency(
|
||||
|
||||
# Verify response structure
|
||||
embeddings = EmbeddingResponse.model_validate(
|
||||
embedding_response.model_dump(mode="json"))
|
||||
embedding_response.model_dump(mode="json")
|
||||
)
|
||||
|
||||
assert embeddings.id is not None
|
||||
assert len(embeddings.data) == 1
|
||||
@@ -411,7 +419,8 @@ async def test_chunked_vs_normal_consistency(
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_chunked_processing_response_format(
|
||||
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str):
|
||||
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str
|
||||
):
|
||||
"""Test response format and structure during chunked processing."""
|
||||
|
||||
# Test with long text to trigger chunking
|
||||
@@ -423,7 +432,8 @@ async def test_chunked_processing_response_format(
|
||||
|
||||
# Verify response structure
|
||||
embeddings = EmbeddingResponse.model_validate(
|
||||
embedding_response.model_dump(mode="json"))
|
||||
embedding_response.model_dump(mode="json")
|
||||
)
|
||||
|
||||
assert embeddings.id is not None
|
||||
assert len(embeddings.data) == 1
|
||||
@@ -433,8 +443,10 @@ async def test_chunked_processing_response_format(
|
||||
# Verify embedding vector properties
|
||||
embedding_vector = embeddings.data[0].embedding
|
||||
import math
|
||||
|
||||
vector_norm = math.sqrt(sum(x * x for x in embedding_vector))
|
||||
# Check that the vector is normalized
|
||||
# (default behavior for most embedding models)
|
||||
assert 0.8 < vector_norm < 1.2, (
|
||||
f"Vector norm should be reasonable, actual: {vector_norm}")
|
||||
f"Vector norm should be reasonable, actual: {vector_norm}"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user