[Frontend] Use new Renderer for Completions and Tokenize API (#32863)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2026-01-31 20:51:15 +08:00
parent 8980001c93
commit f0a1c8453a
64 changed files with 2116 additions and 2003 deletions
--- a/tests/entrypoints/pooling/basic/test_truncation.py
+++ b/tests/entrypoints/pooling/basic/test_truncation.py
@@ -67,20 +67,6 @@ async def test_smaller_truncation_size(client: openai.AsyncOpenAI):
    assert response["usage"]["prompt_tokens"] == truncation_size


-@pytest.mark.asyncio
-async def test_zero_truncation_size(client: openai.AsyncOpenAI):
-    truncation_size = 0
-    kwargs: dict[str, Any] = {
-        "model": MODEL_NAME,
-        "input": input,
-        "truncate_prompt_tokens": truncation_size,
-    }
-
-    response = await client.post(path="embeddings", cast_to=object, body={**kwargs})
-
-    assert response["usage"]["prompt_tokens"] == truncation_size
-
-
@pytest.mark.asyncio
 async def test_bigger_truncation_size(client: openai.AsyncOpenAI):
    truncation_size = max_model_len + 1
--- a/tests/entrypoints/pooling/classify/test_online.py
+++ b/tests/entrypoints/pooling/classify/test_online.py
@@ -128,12 +128,10 @@ def test_empty_input_error(server: RemoteOpenAIServer, model_name: str):
        server.url_for("classify"),
        json={"model": model_name, "input": []},
    )
-    classification_response.raise_for_status()
-    output = ClassificationResponse.model_validate(classification_response.json())

-    assert output.object == "list"
-    assert isinstance(output.data, list)
-    assert len(output.data) == 0
+    error = classification_response.json()
+    assert classification_response.status_code == 400
+    assert "error" in error


@pytest.mark.parametrize("model_name", [MODEL_NAME])
--- a/tests/entrypoints/pooling/score/test_online_score.py
+++ b/tests/entrypoints/pooling/score/test_online_score.py
@@ -247,7 +247,7 @@ class TestModel:
            },
        )
        assert score_response.status_code == 400
-        assert "Please, select a smaller truncation size." in score_response.text
+        assert "Please request a smaller truncation size." in score_response.text

    def test_invocations(self, server: RemoteOpenAIServer, model: dict[str, Any]):
        queries = "What is the capital of France?"