[Frontend] Use new Renderer for Completions and Tokenize API (#32863)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2026-01-31 20:51:15 +08:00
parent 8980001c93
commit f0a1c8453a
64 changed files with 2116 additions and 2003 deletions
--- a/tests/entrypoints/pooling/basic/test_truncation.py
+++ b/tests/entrypoints/pooling/basic/test_truncation.py
@@ -67,20 +67,6 @@ async def test_smaller_truncation_size(client: openai.AsyncOpenAI):
    assert response["usage"]["prompt_tokens"] == truncation_size


-@pytest.mark.asyncio
-async def test_zero_truncation_size(client: openai.AsyncOpenAI):
-    truncation_size = 0
-    kwargs: dict[str, Any] = {
-        "model": MODEL_NAME,
-        "input": input,
-        "truncate_prompt_tokens": truncation_size,
-    }
-
-    response = await client.post(path="embeddings", cast_to=object, body={**kwargs})
-
-    assert response["usage"]["prompt_tokens"] == truncation_size
-
-
@pytest.mark.asyncio
 async def test_bigger_truncation_size(client: openai.AsyncOpenAI):
    truncation_size = max_model_len + 1