[Frontend] Use new Renderer for Completions and Tokenize API (#32863)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2026-01-31 20:51:15 +08:00
parent 8980001c93
commit f0a1c8453a
64 changed files with 2116 additions and 2003 deletions
--- a/tests/entrypoints/llm/test_chat.py
+++ b/tests/entrypoints/llm/test_chat.py
@@ -205,7 +205,7 @@ def test_chat_batch_failure_cleanup(llm_for_failure_test):
        valid_msg,
    ]
    sampling_params = SamplingParams(temperature=0, max_tokens=10)
-    with pytest.raises(ValueError, match="longer than the maximum model length"):
+    with pytest.raises(ValueError, match="context length is only"):
        llm.chat(batch_1, sampling_params=sampling_params)
    outputs_2 = llm.chat(batch_2, sampling_params=sampling_params)
    assert len(outputs_2) == len(batch_2)